a bit of omptimization.
This commit was SVN r17528.
Этот коммит содержится в:
родитель
a0d12a9c92
Коммит
b9bb78484d
@ -107,16 +107,31 @@ BEGIN_C_DECLS
|
|||||||
*/
|
*/
|
||||||
typedef struct mca_coll_sm2_component_t mca_coll_sm2_component_t;
|
typedef struct mca_coll_sm2_component_t mca_coll_sm2_component_t;
|
||||||
|
|
||||||
|
/* enum for node type */
|
||||||
|
enum{
|
||||||
|
ROOT_NODE,
|
||||||
|
LEAF_NODE,
|
||||||
|
INTERIOR_NODE
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* N-order tree node description
|
* N-order tree node description
|
||||||
*/
|
*/
|
||||||
struct tree_node_t {
|
struct tree_node_t {
|
||||||
|
/* my rank within the group */
|
||||||
int my_rank;
|
int my_rank;
|
||||||
|
/* my node type - root, leaf, or interior */
|
||||||
|
int my_node_type;
|
||||||
|
/* number of nodes in the tree */
|
||||||
int tree_size;
|
int tree_size;
|
||||||
|
/* number of parents (0/1) */
|
||||||
int n_parents;
|
int n_parents;
|
||||||
|
/* number of children */
|
||||||
int n_children;
|
int n_children;
|
||||||
|
/* parent rank within the group */
|
||||||
int parent_rank;
|
int parent_rank;
|
||||||
|
/* chidren ranks within the group */
|
||||||
int *children_ranks;
|
int *children_ranks;
|
||||||
};
|
};
|
||||||
typedef struct tree_node_t tree_node_t;
|
typedef struct tree_node_t tree_node_t;
|
||||||
|
@ -83,6 +83,48 @@ int mca_coll_sm2_nbbarrier_intra(struct ompi_communicator_t *comm,
|
|||||||
request->tag=sm_module->nb_barrier_tag;
|
request->tag=sm_module->nb_barrier_tag;
|
||||||
tag=sm_module->nb_barrier_tag;
|
tag=sm_module->nb_barrier_tag;
|
||||||
|
|
||||||
|
if( LEAF_NODE == sm_module->barrier_tree.my_node_type ) {
|
||||||
|
/*
|
||||||
|
* Fan-in phase
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Set my completion flag */
|
||||||
|
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||||
|
((char *)sm_barrier_region+
|
||||||
|
sm_module->barrier_tree.my_rank*
|
||||||
|
sm_module->segement_size_per_process);
|
||||||
|
sm_address->flag=tag;
|
||||||
|
/* don't need memory barrier here, as we are not setting any other sm
|
||||||
|
* data for someone else to read
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Fan-out phase
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* check to see if parent has checked in
|
||||||
|
*/
|
||||||
|
if(sm_module->barrier_tree.n_parents > 0 ) {
|
||||||
|
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||||
|
((char *)sm_barrier_region+
|
||||||
|
sm_module->barrier_tree.parent_rank*
|
||||||
|
sm_module->segement_size_per_process);
|
||||||
|
if( sm_address->flag != -tag ) {
|
||||||
|
/* if parent has not checked in - set parameters for async
|
||||||
|
* completion, incomplet barrier flag, and bail
|
||||||
|
*/
|
||||||
|
request->sm2_barrier_phase=NB_BARRIER_FAN_OUT;
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* set my completion flag
|
||||||
|
*/
|
||||||
|
request->sm2_barrier_phase=NB_BARRIER_DONE;
|
||||||
|
|
||||||
|
} else if( INTERIOR_NODE == sm_module->barrier_tree.my_node_type ) {
|
||||||
/*
|
/*
|
||||||
* Fan-in phase
|
* Fan-in phase
|
||||||
*/
|
*/
|
||||||
@ -130,7 +172,6 @@ int mca_coll_sm2_nbbarrier_intra(struct ompi_communicator_t *comm,
|
|||||||
/*
|
/*
|
||||||
* check to see if parent has checked in
|
* check to see if parent has checked in
|
||||||
*/
|
*/
|
||||||
if(sm_module->barrier_tree.n_parents > 0 ) {
|
|
||||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||||
((char *)sm_barrier_region+
|
((char *)sm_barrier_region+
|
||||||
sm_module->barrier_tree.parent_rank*
|
sm_module->barrier_tree.parent_rank*
|
||||||
@ -142,7 +183,6 @@ int mca_coll_sm2_nbbarrier_intra(struct ompi_communicator_t *comm,
|
|||||||
request->sm2_barrier_phase=NB_BARRIER_FAN_OUT;
|
request->sm2_barrier_phase=NB_BARRIER_FAN_OUT;
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||||
((char *)sm_barrier_region+
|
((char *)sm_barrier_region+
|
||||||
@ -155,10 +195,52 @@ int mca_coll_sm2_nbbarrier_intra(struct ompi_communicator_t *comm,
|
|||||||
*/
|
*/
|
||||||
request->sm2_barrier_phase=NB_BARRIER_DONE;
|
request->sm2_barrier_phase=NB_BARRIER_DONE;
|
||||||
|
|
||||||
|
|
||||||
|
} else {
|
||||||
|
/* root node */
|
||||||
/*
|
/*
|
||||||
* set barrier completion flag
|
* Fan-in phase
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/* check to see if children have checked in */
|
||||||
|
cnt=0;
|
||||||
|
for( child=0 ; child < sm_module->barrier_tree.n_children ; child++ ) {
|
||||||
|
/* compute flag address */
|
||||||
|
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||||
|
((char *)sm_barrier_region+
|
||||||
|
sm_module->barrier_tree.children_ranks[child] *
|
||||||
|
sm_module->segement_size_per_process);
|
||||||
|
if(sm_address->flag == tag ) {
|
||||||
|
/* child arrived */
|
||||||
|
cnt++;
|
||||||
|
} else {
|
||||||
|
/* child not arrived, just break out */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if children have not checked in - set paramenters for async
|
||||||
|
* completion, incomplet barrier flag, and bail
|
||||||
|
*/
|
||||||
|
if( cnt != sm_module->barrier_tree.n_children ) {
|
||||||
|
/* set restart parameters, and exit */
|
||||||
|
request->sm2_barrier_phase=NB_BARRIER_FAN_IN;
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set my fan-out flag */
|
||||||
|
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||||
|
((char *)sm_barrier_region+
|
||||||
|
sm_module->barrier_tree.my_rank*
|
||||||
|
sm_module->segement_size_per_process);
|
||||||
|
sm_address->flag=-tag;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* set my completion flag
|
||||||
|
*/
|
||||||
|
request->sm2_barrier_phase=NB_BARRIER_DONE;
|
||||||
|
|
||||||
|
}
|
||||||
/* return - successful completion */
|
/* return - successful completion */
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -188,9 +270,47 @@ int mca_coll_sm2_nbbarrier_intra_progress(struct ompi_communicator_t *comm,
|
|||||||
sm_module=(mca_coll_sm2_module_t *)module;
|
sm_module=(mca_coll_sm2_module_t *)module;
|
||||||
tag=request->tag;
|
tag=request->tag;
|
||||||
|
|
||||||
|
if( LEAF_NODE == sm_module->barrier_tree.my_node_type ) {
|
||||||
phase=request->sm2_barrier_phase;
|
phase=request->sm2_barrier_phase;
|
||||||
if( NB_BARRIER_FAN_OUT == phase ) {
|
if( NB_BARRIER_FAN_OUT == phase ) {
|
||||||
goto FANOUT;
|
goto FANOUT_LEAF;
|
||||||
|
} else if ( (NB_BARRIER_DONE == phase) || (NB_BARRIER_INACTIVE == phase) ) {
|
||||||
|
goto DONE;
|
||||||
|
}
|
||||||
|
/* defult - NB_BARRIER_FAN_IN */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Fan-in phase
|
||||||
|
*/
|
||||||
|
|
||||||
|
FANOUT_LEAF:
|
||||||
|
/*
|
||||||
|
* Fan-out phase
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* check to see if parent has checked in
|
||||||
|
*/
|
||||||
|
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||||
|
((char *)sm_barrier_region+
|
||||||
|
sm_module->barrier_tree.parent_rank*
|
||||||
|
sm_module->segement_size_per_process);
|
||||||
|
if( sm_address->flag != -tag ) {
|
||||||
|
/* if parent has not checked in - set parameters for async
|
||||||
|
* completion, incomplet barrier flag, and bail
|
||||||
|
*/
|
||||||
|
request->sm2_barrier_phase=NB_BARRIER_FAN_OUT;
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* set my completion flag
|
||||||
|
*/
|
||||||
|
request->sm2_barrier_phase=NB_BARRIER_DONE;
|
||||||
|
} else if( INTERIOR_NODE == sm_module->barrier_tree.my_node_type ) {
|
||||||
|
phase=request->sm2_barrier_phase;
|
||||||
|
if( NB_BARRIER_FAN_OUT == phase ) {
|
||||||
|
goto FANOUT_INTERIOR;
|
||||||
} else if ( (NB_BARRIER_DONE == phase) || (NB_BARRIER_INACTIVE == phase) ) {
|
} else if ( (NB_BARRIER_DONE == phase) || (NB_BARRIER_INACTIVE == phase) ) {
|
||||||
goto DONE;
|
goto DONE;
|
||||||
}
|
}
|
||||||
@ -236,7 +356,7 @@ int mca_coll_sm2_nbbarrier_intra_progress(struct ompi_communicator_t *comm,
|
|||||||
* data for someone else to read
|
* data for someone else to read
|
||||||
*/
|
*/
|
||||||
|
|
||||||
FANOUT:
|
FANOUT_INTERIOR:
|
||||||
/*
|
/*
|
||||||
* Fan-out phase
|
* Fan-out phase
|
||||||
*/
|
*/
|
||||||
@ -244,7 +364,6 @@ FANOUT:
|
|||||||
/*
|
/*
|
||||||
* check to see if parent has checked in
|
* check to see if parent has checked in
|
||||||
*/
|
*/
|
||||||
if(sm_module->barrier_tree.n_parents > 0 ) {
|
|
||||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||||
((char *)sm_barrier_region+
|
((char *)sm_barrier_region+
|
||||||
sm_module->barrier_tree.parent_rank*
|
sm_module->barrier_tree.parent_rank*
|
||||||
@ -256,7 +375,6 @@ FANOUT:
|
|||||||
request->sm2_barrier_phase=NB_BARRIER_FAN_OUT;
|
request->sm2_barrier_phase=NB_BARRIER_FAN_OUT;
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||||
((char *)sm_barrier_region+
|
((char *)sm_barrier_region+
|
||||||
@ -268,11 +386,57 @@ FANOUT:
|
|||||||
* set my completion flag
|
* set my completion flag
|
||||||
*/
|
*/
|
||||||
request->sm2_barrier_phase=NB_BARRIER_DONE;
|
request->sm2_barrier_phase=NB_BARRIER_DONE;
|
||||||
|
} else {
|
||||||
|
/* root node */
|
||||||
|
phase=request->sm2_barrier_phase;
|
||||||
|
if ( (NB_BARRIER_DONE == phase) || (NB_BARRIER_INACTIVE == phase) ) {
|
||||||
|
goto DONE;
|
||||||
|
}
|
||||||
|
/* defult - NB_BARRIER_FAN_IN */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* set barrier completion flag
|
* Fan-in phase
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/* check to see if children have checked in */
|
||||||
|
cnt=0;
|
||||||
|
for( child=0 ; child < sm_module->barrier_tree.n_children ; child++ ) {
|
||||||
|
/* compute flag address */
|
||||||
|
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||||
|
((char *)sm_barrier_region+
|
||||||
|
sm_module->barrier_tree.children_ranks[child] *
|
||||||
|
sm_module->segement_size_per_process);
|
||||||
|
if(sm_address->flag == tag ) {
|
||||||
|
/* child arrived */
|
||||||
|
cnt++;
|
||||||
|
} else {
|
||||||
|
/* child not arrived, just break out */
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if children have not checked in - set paramenters for async
|
||||||
|
* completion, incomplet barrier flag, and bail
|
||||||
|
*/
|
||||||
|
if( cnt != sm_module->barrier_tree.n_children ) {
|
||||||
|
/* set restart parameters, and exit */
|
||||||
|
request->sm2_barrier_phase=NB_BARRIER_FAN_IN;
|
||||||
|
return OMPI_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set my completion flag */
|
||||||
|
sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *)
|
||||||
|
((char *)sm_barrier_region+
|
||||||
|
sm_module->barrier_tree.my_rank *
|
||||||
|
sm_module->segement_size_per_process);
|
||||||
|
sm_address->flag=-tag;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* set my completion flag
|
||||||
|
*/
|
||||||
|
request->sm2_barrier_phase=NB_BARRIER_DONE;
|
||||||
|
}
|
||||||
|
|
||||||
DONE:
|
DONE:
|
||||||
/* return - successful completion */
|
/* return - successful completion */
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
|
@ -336,6 +336,15 @@ static int setup_nary_tree(int tree_order, int my_rank, int num_nodes,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/* set node type */
|
||||||
|
if( 0 == my_node->n_parents ) {
|
||||||
|
my_node->my_node_type=ROOT_NODE;
|
||||||
|
} else if ( 0 == my_node->n_children ) {
|
||||||
|
my_node->my_node_type=LEAF_NODE;
|
||||||
|
} else {
|
||||||
|
my_node->my_node_type=INTERIOR_NODE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* successful return */
|
/* successful return */
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user