a bit of omptimization.
This commit was SVN r17528.
Этот коммит содержится в:
родитель
a0d12a9c92
Коммит
b9bb78484d
@ -107,16 +107,31 @@ BEGIN_C_DECLS
|
||||
*/
|
||||
typedef struct mca_coll_sm2_component_t mca_coll_sm2_component_t;
|
||||
|
||||
/* enum for node type */
|
||||
enum{
|
||||
ROOT_NODE,
|
||||
LEAF_NODE,
|
||||
INTERIOR_NODE
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
* N-order tree node description
|
||||
*/
|
||||
struct tree_node_t {
|
||||
/* my rank within the group */
|
||||
int my_rank;
|
||||
/* my node type - root, leaf, or interior */
|
||||
int my_node_type;
|
||||
/* number of nodes in the tree */
|
||||
int tree_size;
|
||||
/* number of parents (0/1) */
|
||||
int n_parents;
|
||||
/* number of children */
|
||||
int n_children;
|
||||
/* parent rank within the group */
|
||||
int parent_rank;
|
||||
/* chidren ranks within the group */
|
||||
int *children_ranks;
|
||||
};
|
||||
typedef struct tree_node_t tree_node_t;
|
||||
|
@ -83,54 +83,95 @@ int mca_coll_sm2_nbbarrier_intra(struct ompi_communicator_t *comm,
|
||||
request->tag=sm_module->nb_barrier_tag;
|
||||
tag=sm_module->nb_barrier_tag;
|
||||
|
||||
/*
|
||||
* Fan-in phase
|
||||
*/
|
||||
|
||||
/* check to see if children have checked in */
|
||||
cnt=0;
|
||||
for( child=0 ; child < sm_module->barrier_tree.n_children ; child++ ) {
|
||||
/* compute flag address */
|
||||
if( LEAF_NODE == sm_module->barrier_tree.my_node_type ) {
|
||||
/*
|
||||
* Fan-in phase
|
||||
*/
|
||||
|
||||
/* Set my completion flag */
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->barrier_tree.children_ranks[child] *
|
||||
sm_module->segement_size_per_process);
|
||||
if(sm_address->flag == tag ) {
|
||||
/* child arrived */
|
||||
cnt++;
|
||||
} else {
|
||||
/* child not arrived, just break out */
|
||||
break;
|
||||
sm_module->barrier_tree.my_rank*
|
||||
sm_module->segement_size_per_process);
|
||||
sm_address->flag=tag;
|
||||
/* don't need memory barrier here, as we are not setting any other sm
|
||||
* data for someone else to read
|
||||
*/
|
||||
|
||||
/*
|
||||
* Fan-out phase
|
||||
*/
|
||||
|
||||
/*
|
||||
* check to see if parent has checked in
|
||||
*/
|
||||
if(sm_module->barrier_tree.n_parents > 0 ) {
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->barrier_tree.parent_rank*
|
||||
sm_module->segement_size_per_process);
|
||||
if( sm_address->flag != -tag ) {
|
||||
/* if parent has not checked in - set parameters for async
|
||||
* completion, incomplet barrier flag, and bail
|
||||
*/
|
||||
request->sm2_barrier_phase=NB_BARRIER_FAN_OUT;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* if children have not checked in - set paramenters for async
|
||||
* completion, incomplet barrier flag, and bail
|
||||
*/
|
||||
if( cnt != sm_module->barrier_tree.n_children ) {
|
||||
/* set restart parameters, and exit */
|
||||
request->sm2_barrier_phase=NB_BARRIER_FAN_IN;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Set my completion flag */
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->barrier_tree.my_rank*
|
||||
sm_module->segement_size_per_process);
|
||||
sm_address->flag=tag;
|
||||
/* don't need memory barrier here, as we are not setting any other sm
|
||||
* data for someone else to read
|
||||
*/
|
||||
|
||||
/*
|
||||
* Fan-out phase
|
||||
*/
|
||||
|
||||
/*
|
||||
* check to see if parent has checked in
|
||||
*/
|
||||
if(sm_module->barrier_tree.n_parents > 0 ) {
|
||||
|
||||
/*
|
||||
* set my completion flag
|
||||
*/
|
||||
request->sm2_barrier_phase=NB_BARRIER_DONE;
|
||||
|
||||
} else if( INTERIOR_NODE == sm_module->barrier_tree.my_node_type ) {
|
||||
/*
|
||||
* Fan-in phase
|
||||
*/
|
||||
|
||||
/* check to see if children have checked in */
|
||||
cnt=0;
|
||||
for( child=0 ; child < sm_module->barrier_tree.n_children ; child++ ) {
|
||||
/* compute flag address */
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->barrier_tree.children_ranks[child] *
|
||||
sm_module->segement_size_per_process);
|
||||
if(sm_address->flag == tag ) {
|
||||
/* child arrived */
|
||||
cnt++;
|
||||
} else {
|
||||
/* child not arrived, just break out */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* if children have not checked in - set paramenters for async
|
||||
* completion, incomplet barrier flag, and bail
|
||||
*/
|
||||
if( cnt != sm_module->barrier_tree.n_children ) {
|
||||
/* set restart parameters, and exit */
|
||||
request->sm2_barrier_phase=NB_BARRIER_FAN_IN;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Set my completion flag */
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->barrier_tree.my_rank*
|
||||
sm_module->segement_size_per_process);
|
||||
sm_address->flag=tag;
|
||||
/* don't need memory barrier here, as we are not setting any other sm
|
||||
* data for someone else to read
|
||||
*/
|
||||
|
||||
/*
|
||||
* Fan-out phase
|
||||
*/
|
||||
|
||||
/*
|
||||
* check to see if parent has checked in
|
||||
*/
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->barrier_tree.parent_rank*
|
||||
@ -142,23 +183,64 @@ int mca_coll_sm2_nbbarrier_intra(struct ompi_communicator_t *comm,
|
||||
request->sm2_barrier_phase=NB_BARRIER_FAN_OUT;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->barrier_tree.my_rank *
|
||||
sm_module->segement_size_per_process);
|
||||
sm_address->flag=-tag;
|
||||
|
||||
/*
|
||||
* set my completion flag
|
||||
*/
|
||||
request->sm2_barrier_phase=NB_BARRIER_DONE;
|
||||
|
||||
|
||||
} else {
|
||||
/* root node */
|
||||
/*
|
||||
* Fan-in phase
|
||||
*/
|
||||
|
||||
/* check to see if children have checked in */
|
||||
cnt=0;
|
||||
for( child=0 ; child < sm_module->barrier_tree.n_children ; child++ ) {
|
||||
/* compute flag address */
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->barrier_tree.children_ranks[child] *
|
||||
sm_module->segement_size_per_process);
|
||||
if(sm_address->flag == tag ) {
|
||||
/* child arrived */
|
||||
cnt++;
|
||||
} else {
|
||||
/* child not arrived, just break out */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* if children have not checked in - set paramenters for async
|
||||
* completion, incomplet barrier flag, and bail
|
||||
*/
|
||||
if( cnt != sm_module->barrier_tree.n_children ) {
|
||||
/* set restart parameters, and exit */
|
||||
request->sm2_barrier_phase=NB_BARRIER_FAN_IN;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Set my fan-out flag */
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->barrier_tree.my_rank*
|
||||
sm_module->segement_size_per_process);
|
||||
sm_address->flag=-tag;
|
||||
|
||||
/*
|
||||
* set my completion flag
|
||||
*/
|
||||
request->sm2_barrier_phase=NB_BARRIER_DONE;
|
||||
|
||||
}
|
||||
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->barrier_tree.my_rank *
|
||||
sm_module->segement_size_per_process);
|
||||
sm_address->flag=-tag;
|
||||
|
||||
/*
|
||||
* set my completion flag
|
||||
*/
|
||||
request->sm2_barrier_phase=NB_BARRIER_DONE;
|
||||
|
||||
/*
|
||||
* set barrier completion flag
|
||||
*/
|
||||
|
||||
/* return - successful completion */
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
@ -188,63 +270,27 @@ int mca_coll_sm2_nbbarrier_intra_progress(struct ompi_communicator_t *comm,
|
||||
sm_module=(mca_coll_sm2_module_t *)module;
|
||||
tag=request->tag;
|
||||
|
||||
phase=request->sm2_barrier_phase;
|
||||
if( NB_BARRIER_FAN_OUT == phase ) {
|
||||
goto FANOUT;
|
||||
} else if ( (NB_BARRIER_DONE == phase) || (NB_BARRIER_INACTIVE == phase) ) {
|
||||
goto DONE;
|
||||
}
|
||||
/* defult - NB_BARRIER_FAN_IN */
|
||||
|
||||
/*
|
||||
* Fan-in phase
|
||||
*/
|
||||
|
||||
/* check to see if children have checked in */
|
||||
cnt=0;
|
||||
for( child=0 ; child < sm_module->barrier_tree.n_children ; child++ ) {
|
||||
/* compute flag address */
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->barrier_tree.children_ranks[child] *
|
||||
sm_module->segement_size_per_process);
|
||||
if(sm_address->flag == tag ) {
|
||||
/* child arrived */
|
||||
cnt++;
|
||||
} else {
|
||||
/* child not arrived, just break out */
|
||||
break;
|
||||
if( LEAF_NODE == sm_module->barrier_tree.my_node_type ) {
|
||||
phase=request->sm2_barrier_phase;
|
||||
if( NB_BARRIER_FAN_OUT == phase ) {
|
||||
goto FANOUT_LEAF;
|
||||
} else if ( (NB_BARRIER_DONE == phase) || (NB_BARRIER_INACTIVE == phase) ) {
|
||||
goto DONE;
|
||||
}
|
||||
}
|
||||
|
||||
/* if children have not checked in - set paramenters for async
|
||||
* completion, incomplet barrier flag, and bail
|
||||
*/
|
||||
if( cnt != sm_module->barrier_tree.n_children ) {
|
||||
/* set restart parameters, and exit */
|
||||
request->sm2_barrier_phase=NB_BARRIER_FAN_IN;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Set my completion flag */
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->barrier_tree.my_rank *
|
||||
sm_module->segement_size_per_process);
|
||||
sm_address->flag=tag;
|
||||
/* don't need memory barrier here, as we are not setting any other sm
|
||||
* data for someone else to read
|
||||
*/
|
||||
|
||||
FANOUT:
|
||||
/*
|
||||
* Fan-out phase
|
||||
*/
|
||||
|
||||
/*
|
||||
* check to see if parent has checked in
|
||||
*/
|
||||
if(sm_module->barrier_tree.n_parents > 0 ) {
|
||||
/* defult - NB_BARRIER_FAN_IN */
|
||||
|
||||
/*
|
||||
* Fan-in phase
|
||||
*/
|
||||
|
||||
FANOUT_LEAF:
|
||||
/*
|
||||
* Fan-out phase
|
||||
*/
|
||||
|
||||
/*
|
||||
* check to see if parent has checked in
|
||||
*/
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->barrier_tree.parent_rank*
|
||||
@ -256,23 +302,141 @@ FANOUT:
|
||||
request->sm2_barrier_phase=NB_BARRIER_FAN_OUT;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* set my completion flag
|
||||
*/
|
||||
request->sm2_barrier_phase=NB_BARRIER_DONE;
|
||||
} else if( INTERIOR_NODE == sm_module->barrier_tree.my_node_type ) {
|
||||
phase=request->sm2_barrier_phase;
|
||||
if( NB_BARRIER_FAN_OUT == phase ) {
|
||||
goto FANOUT_INTERIOR;
|
||||
} else if ( (NB_BARRIER_DONE == phase) || (NB_BARRIER_INACTIVE == phase) ) {
|
||||
goto DONE;
|
||||
}
|
||||
/* defult - NB_BARRIER_FAN_IN */
|
||||
|
||||
/*
|
||||
* Fan-in phase
|
||||
*/
|
||||
|
||||
/* check to see if children have checked in */
|
||||
cnt=0;
|
||||
for( child=0 ; child < sm_module->barrier_tree.n_children ; child++ ) {
|
||||
/* compute flag address */
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->barrier_tree.children_ranks[child] *
|
||||
sm_module->segement_size_per_process);
|
||||
if(sm_address->flag == tag ) {
|
||||
/* child arrived */
|
||||
cnt++;
|
||||
} else {
|
||||
/* child not arrived, just break out */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* if children have not checked in - set paramenters for async
|
||||
* completion, incomplet barrier flag, and bail
|
||||
*/
|
||||
if( cnt != sm_module->barrier_tree.n_children ) {
|
||||
/* set restart parameters, and exit */
|
||||
request->sm2_barrier_phase=NB_BARRIER_FAN_IN;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Set my completion flag */
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->barrier_tree.my_rank *
|
||||
sm_module->segement_size_per_process);
|
||||
sm_address->flag=tag;
|
||||
/* don't need memory barrier here, as we are not setting any other sm
|
||||
* data for someone else to read
|
||||
*/
|
||||
|
||||
FANOUT_INTERIOR:
|
||||
/*
|
||||
* Fan-out phase
|
||||
*/
|
||||
|
||||
/*
|
||||
* check to see if parent has checked in
|
||||
*/
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->barrier_tree.parent_rank*
|
||||
sm_module->segement_size_per_process);
|
||||
if( sm_address->flag != -tag ) {
|
||||
/* if parent has not checked in - set parameters for async
|
||||
* completion, incomplet barrier flag, and bail
|
||||
*/
|
||||
request->sm2_barrier_phase=NB_BARRIER_FAN_OUT;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->barrier_tree.my_rank *
|
||||
sm_module->segement_size_per_process);
|
||||
sm_address->flag=-tag;
|
||||
|
||||
/*
|
||||
* set my completion flag
|
||||
*/
|
||||
request->sm2_barrier_phase=NB_BARRIER_DONE;
|
||||
} else {
|
||||
/* root node */
|
||||
phase=request->sm2_barrier_phase;
|
||||
if ( (NB_BARRIER_DONE == phase) || (NB_BARRIER_INACTIVE == phase) ) {
|
||||
goto DONE;
|
||||
}
|
||||
/* defult - NB_BARRIER_FAN_IN */
|
||||
|
||||
/*
|
||||
* Fan-in phase
|
||||
*/
|
||||
|
||||
/* check to see if children have checked in */
|
||||
cnt=0;
|
||||
for( child=0 ; child < sm_module->barrier_tree.n_children ; child++ ) {
|
||||
/* compute flag address */
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->barrier_tree.children_ranks[child] *
|
||||
sm_module->segement_size_per_process);
|
||||
if(sm_address->flag == tag ) {
|
||||
/* child arrived */
|
||||
cnt++;
|
||||
} else {
|
||||
/* child not arrived, just break out */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* if children have not checked in - set paramenters for async
|
||||
* completion, incomplet barrier flag, and bail
|
||||
*/
|
||||
if( cnt != sm_module->barrier_tree.n_children ) {
|
||||
/* set restart parameters, and exit */
|
||||
request->sm2_barrier_phase=NB_BARRIER_FAN_IN;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/* Set my completion flag */
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->barrier_tree.my_rank *
|
||||
sm_module->segement_size_per_process);
|
||||
sm_address->flag=-tag;
|
||||
|
||||
/*
|
||||
* set my completion flag
|
||||
*/
|
||||
request->sm2_barrier_phase=NB_BARRIER_DONE;
|
||||
}
|
||||
|
||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||
((char *)sm_barrier_region+
|
||||
sm_module->barrier_tree.my_rank *
|
||||
sm_module->segement_size_per_process);
|
||||
sm_address->flag=-tag;
|
||||
|
||||
/*
|
||||
* set my completion flag
|
||||
*/
|
||||
request->sm2_barrier_phase=NB_BARRIER_DONE;
|
||||
|
||||
/*
|
||||
* set barrier completion flag
|
||||
*/
|
||||
|
||||
|
||||
DONE:
|
||||
/* return - successful completion */
|
||||
return OMPI_SUCCESS;
|
||||
|
@ -336,6 +336,15 @@ static int setup_nary_tree(int tree_order, int my_rank, int num_nodes,
|
||||
}
|
||||
}
|
||||
}
|
||||
/* set node type */
|
||||
if( 0 == my_node->n_parents ) {
|
||||
my_node->my_node_type=ROOT_NODE;
|
||||
} else if ( 0 == my_node->n_children ) {
|
||||
my_node->my_node_type=LEAF_NODE;
|
||||
} else {
|
||||
my_node->my_node_type=INTERIOR_NODE;
|
||||
}
|
||||
|
||||
|
||||
/* successful return */
|
||||
return OMPI_SUCCESS;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user