From b9bb78484d535892711e24f30b311cfbe59942a0 Mon Sep 17 00:00:00 2001 From: Rich Graham Date: Wed, 20 Feb 2008 16:19:49 +0000 Subject: [PATCH] a bit of omptimization. This commit was SVN r17528. --- ompi/mca/coll/sm2/coll_sm2.h | 15 + ompi/mca/coll/sm2/coll_sm2_barrier.c | 430 ++++++++++++++++++--------- ompi/mca/coll/sm2/coll_sm2_module.c | 9 + 3 files changed, 321 insertions(+), 133 deletions(-) diff --git a/ompi/mca/coll/sm2/coll_sm2.h b/ompi/mca/coll/sm2/coll_sm2.h index c17085a85f..d42520ad6c 100644 --- a/ompi/mca/coll/sm2/coll_sm2.h +++ b/ompi/mca/coll/sm2/coll_sm2.h @@ -107,16 +107,31 @@ BEGIN_C_DECLS */ typedef struct mca_coll_sm2_component_t mca_coll_sm2_component_t; + /* enum for node type */ + enum{ + ROOT_NODE, + LEAF_NODE, + INTERIOR_NODE + }; + /* * N-order tree node description */ struct tree_node_t { + /* my rank within the group */ int my_rank; + /* my node type - root, leaf, or interior */ + int my_node_type; + /* number of nodes in the tree */ int tree_size; + /* number of parents (0/1) */ int n_parents; + /* number of children */ int n_children; + /* parent rank within the group */ int parent_rank; + /* chidren ranks within the group */ int *children_ranks; }; typedef struct tree_node_t tree_node_t; diff --git a/ompi/mca/coll/sm2/coll_sm2_barrier.c b/ompi/mca/coll/sm2/coll_sm2_barrier.c index f233c02763..95c2464c8c 100644 --- a/ompi/mca/coll/sm2/coll_sm2_barrier.c +++ b/ompi/mca/coll/sm2/coll_sm2_barrier.c @@ -83,54 +83,95 @@ int mca_coll_sm2_nbbarrier_intra(struct ompi_communicator_t *comm, request->tag=sm_module->nb_barrier_tag; tag=sm_module->nb_barrier_tag; - /* - * Fan-in phase - */ - - /* check to see if children have checked in */ - cnt=0; - for( child=0 ; child < sm_module->barrier_tree.n_children ; child++ ) { - /* compute flag address */ + if( LEAF_NODE == sm_module->barrier_tree.my_node_type ) { + /* + * Fan-in phase + */ + + /* Set my completion flag */ sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *) ((char *)sm_barrier_region+ - sm_module->barrier_tree.children_ranks[child] * - sm_module->segement_size_per_process); - if(sm_address->flag == tag ) { - /* child arrived */ - cnt++; - } else { - /* child not arrived, just break out */ - break; + sm_module->barrier_tree.my_rank* + sm_module->segement_size_per_process); + sm_address->flag=tag; + /* don't need memory barrier here, as we are not setting any other sm + * data for someone else to read + */ + + /* + * Fan-out phase + */ + + /* + * check to see if parent has checked in + */ + if(sm_module->barrier_tree.n_parents > 0 ) { + sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *) + ((char *)sm_barrier_region+ + sm_module->barrier_tree.parent_rank* + sm_module->segement_size_per_process); + if( sm_address->flag != -tag ) { + /* if parent has not checked in - set parameters for async + * completion, incomplet barrier flag, and bail + */ + request->sm2_barrier_phase=NB_BARRIER_FAN_OUT; + return OMPI_SUCCESS; + } } - } - - /* if children have not checked in - set paramenters for async - * completion, incomplet barrier flag, and bail - */ - if( cnt != sm_module->barrier_tree.n_children ) { - /* set restart parameters, and exit */ - request->sm2_barrier_phase=NB_BARRIER_FAN_IN; - return OMPI_SUCCESS; - } - - /* Set my completion flag */ - sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *) - ((char *)sm_barrier_region+ - sm_module->barrier_tree.my_rank* - sm_module->segement_size_per_process); - sm_address->flag=tag; - /* don't need memory barrier here, as we are not setting any other sm - * data for someone else to read - */ - - /* - * Fan-out phase - */ - - /* - * check to see if parent has checked in - */ - if(sm_module->barrier_tree.n_parents > 0 ) { + + /* + * set my completion flag + */ + request->sm2_barrier_phase=NB_BARRIER_DONE; + + } else if( INTERIOR_NODE == sm_module->barrier_tree.my_node_type ) { + /* + * Fan-in phase + */ + + /* check to see if children have checked in */ + cnt=0; + for( child=0 ; child < sm_module->barrier_tree.n_children ; child++ ) { + /* compute flag address */ + sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *) + ((char *)sm_barrier_region+ + sm_module->barrier_tree.children_ranks[child] * + sm_module->segement_size_per_process); + if(sm_address->flag == tag ) { + /* child arrived */ + cnt++; + } else { + /* child not arrived, just break out */ + break; + } + } + + /* if children have not checked in - set paramenters for async + * completion, incomplet barrier flag, and bail + */ + if( cnt != sm_module->barrier_tree.n_children ) { + /* set restart parameters, and exit */ + request->sm2_barrier_phase=NB_BARRIER_FAN_IN; + return OMPI_SUCCESS; + } + + /* Set my completion flag */ + sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *) + ((char *)sm_barrier_region+ + sm_module->barrier_tree.my_rank* + sm_module->segement_size_per_process); + sm_address->flag=tag; + /* don't need memory barrier here, as we are not setting any other sm + * data for someone else to read + */ + + /* + * Fan-out phase + */ + + /* + * check to see if parent has checked in + */ sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *) ((char *)sm_barrier_region+ sm_module->barrier_tree.parent_rank* @@ -142,23 +183,64 @@ int mca_coll_sm2_nbbarrier_intra(struct ompi_communicator_t *comm, request->sm2_barrier_phase=NB_BARRIER_FAN_OUT; return OMPI_SUCCESS; } + + sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *) + ((char *)sm_barrier_region+ + sm_module->barrier_tree.my_rank * + sm_module->segement_size_per_process); + sm_address->flag=-tag; + + /* + * set my completion flag + */ + request->sm2_barrier_phase=NB_BARRIER_DONE; + + + } else { + /* root node */ + /* + * Fan-in phase + */ + + /* check to see if children have checked in */ + cnt=0; + for( child=0 ; child < sm_module->barrier_tree.n_children ; child++ ) { + /* compute flag address */ + sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *) + ((char *)sm_barrier_region+ + sm_module->barrier_tree.children_ranks[child] * + sm_module->segement_size_per_process); + if(sm_address->flag == tag ) { + /* child arrived */ + cnt++; + } else { + /* child not arrived, just break out */ + break; + } + } + + /* if children have not checked in - set paramenters for async + * completion, incomplet barrier flag, and bail + */ + if( cnt != sm_module->barrier_tree.n_children ) { + /* set restart parameters, and exit */ + request->sm2_barrier_phase=NB_BARRIER_FAN_IN; + return OMPI_SUCCESS; + } + + /* Set my fan-out flag */ + sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *) + ((char *)sm_barrier_region+ + sm_module->barrier_tree.my_rank* + sm_module->segement_size_per_process); + sm_address->flag=-tag; + + /* + * set my completion flag + */ + request->sm2_barrier_phase=NB_BARRIER_DONE; + } - - sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *) - ((char *)sm_barrier_region+ - sm_module->barrier_tree.my_rank * - sm_module->segement_size_per_process); - sm_address->flag=-tag; - - /* - * set my completion flag - */ - request->sm2_barrier_phase=NB_BARRIER_DONE; - - /* - * set barrier completion flag - */ - /* return - successful completion */ return OMPI_SUCCESS; } @@ -188,63 +270,27 @@ int mca_coll_sm2_nbbarrier_intra_progress(struct ompi_communicator_t *comm, sm_module=(mca_coll_sm2_module_t *)module; tag=request->tag; - phase=request->sm2_barrier_phase; - if( NB_BARRIER_FAN_OUT == phase ) { - goto FANOUT; - } else if ( (NB_BARRIER_DONE == phase) || (NB_BARRIER_INACTIVE == phase) ) { - goto DONE; - } - /* defult - NB_BARRIER_FAN_IN */ - - /* - * Fan-in phase - */ - - /* check to see if children have checked in */ - cnt=0; - for( child=0 ; child < sm_module->barrier_tree.n_children ; child++ ) { - /* compute flag address */ - sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *) - ((char *)sm_barrier_region+ - sm_module->barrier_tree.children_ranks[child] * - sm_module->segement_size_per_process); - if(sm_address->flag == tag ) { - /* child arrived */ - cnt++; - } else { - /* child not arrived, just break out */ - break; + if( LEAF_NODE == sm_module->barrier_tree.my_node_type ) { + phase=request->sm2_barrier_phase; + if( NB_BARRIER_FAN_OUT == phase ) { + goto FANOUT_LEAF; + } else if ( (NB_BARRIER_DONE == phase) || (NB_BARRIER_INACTIVE == phase) ) { + goto DONE; } - } - - /* if children have not checked in - set paramenters for async - * completion, incomplet barrier flag, and bail - */ - if( cnt != sm_module->barrier_tree.n_children ) { - /* set restart parameters, and exit */ - request->sm2_barrier_phase=NB_BARRIER_FAN_IN; - return OMPI_SUCCESS; - } - - /* Set my completion flag */ - sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *) - ((char *)sm_barrier_region+ - sm_module->barrier_tree.my_rank * - sm_module->segement_size_per_process); - sm_address->flag=tag; - /* don't need memory barrier here, as we are not setting any other sm - * data for someone else to read - */ - -FANOUT: - /* - * Fan-out phase - */ - - /* - * check to see if parent has checked in - */ - if(sm_module->barrier_tree.n_parents > 0 ) { + /* defult - NB_BARRIER_FAN_IN */ + + /* + * Fan-in phase + */ + + FANOUT_LEAF: + /* + * Fan-out phase + */ + + /* + * check to see if parent has checked in + */ sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *) ((char *)sm_barrier_region+ sm_module->barrier_tree.parent_rank* @@ -256,23 +302,141 @@ FANOUT: request->sm2_barrier_phase=NB_BARRIER_FAN_OUT; return OMPI_SUCCESS; } + + /* + * set my completion flag + */ + request->sm2_barrier_phase=NB_BARRIER_DONE; + } else if( INTERIOR_NODE == sm_module->barrier_tree.my_node_type ) { + phase=request->sm2_barrier_phase; + if( NB_BARRIER_FAN_OUT == phase ) { + goto FANOUT_INTERIOR; + } else if ( (NB_BARRIER_DONE == phase) || (NB_BARRIER_INACTIVE == phase) ) { + goto DONE; + } + /* defult - NB_BARRIER_FAN_IN */ + + /* + * Fan-in phase + */ + + /* check to see if children have checked in */ + cnt=0; + for( child=0 ; child < sm_module->barrier_tree.n_children ; child++ ) { + /* compute flag address */ + sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *) + ((char *)sm_barrier_region+ + sm_module->barrier_tree.children_ranks[child] * + sm_module->segement_size_per_process); + if(sm_address->flag == tag ) { + /* child arrived */ + cnt++; + } else { + /* child not arrived, just break out */ + break; + } + } + + /* if children have not checked in - set paramenters for async + * completion, incomplet barrier flag, and bail + */ + if( cnt != sm_module->barrier_tree.n_children ) { + /* set restart parameters, and exit */ + request->sm2_barrier_phase=NB_BARRIER_FAN_IN; + return OMPI_SUCCESS; + } + + /* Set my completion flag */ + sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *) + ((char *)sm_barrier_region+ + sm_module->barrier_tree.my_rank * + sm_module->segement_size_per_process); + sm_address->flag=tag; + /* don't need memory barrier here, as we are not setting any other sm + * data for someone else to read + */ + + FANOUT_INTERIOR: + /* + * Fan-out phase + */ + + /* + * check to see if parent has checked in + */ + sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *) + ((char *)sm_barrier_region+ + sm_module->barrier_tree.parent_rank* + sm_module->segement_size_per_process); + if( sm_address->flag != -tag ) { + /* if parent has not checked in - set parameters for async + * completion, incomplet barrier flag, and bail + */ + request->sm2_barrier_phase=NB_BARRIER_FAN_OUT; + return OMPI_SUCCESS; + } + + sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *) + ((char *)sm_barrier_region+ + sm_module->barrier_tree.my_rank * + sm_module->segement_size_per_process); + sm_address->flag=-tag; + + /* + * set my completion flag + */ + request->sm2_barrier_phase=NB_BARRIER_DONE; + } else { + /* root node */ + phase=request->sm2_barrier_phase; + if ( (NB_BARRIER_DONE == phase) || (NB_BARRIER_INACTIVE == phase) ) { + goto DONE; + } + /* defult - NB_BARRIER_FAN_IN */ + + /* + * Fan-in phase + */ + + /* check to see if children have checked in */ + cnt=0; + for( child=0 ; child < sm_module->barrier_tree.n_children ; child++ ) { + /* compute flag address */ + sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *) + ((char *)sm_barrier_region+ + sm_module->barrier_tree.children_ranks[child] * + sm_module->segement_size_per_process); + if(sm_address->flag == tag ) { + /* child arrived */ + cnt++; + } else { + /* child not arrived, just break out */ + break; + } + } + + /* if children have not checked in - set paramenters for async + * completion, incomplet barrier flag, and bail + */ + if( cnt != sm_module->barrier_tree.n_children ) { + /* set restart parameters, and exit */ + request->sm2_barrier_phase=NB_BARRIER_FAN_IN; + return OMPI_SUCCESS; + } + + /* Set my completion flag */ + sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *) + ((char *)sm_barrier_region+ + sm_module->barrier_tree.my_rank * + sm_module->segement_size_per_process); + sm_address->flag=-tag; + + /* + * set my completion flag + */ + request->sm2_barrier_phase=NB_BARRIER_DONE; } - - sm_address=(mca_coll_sm2_nb_request_process_shared_mem_t *) - ((char *)sm_barrier_region+ - sm_module->barrier_tree.my_rank * - sm_module->segement_size_per_process); - sm_address->flag=-tag; - - /* - * set my completion flag - */ - request->sm2_barrier_phase=NB_BARRIER_DONE; - - /* - * set barrier completion flag - */ - + DONE: /* return - successful completion */ return OMPI_SUCCESS; diff --git a/ompi/mca/coll/sm2/coll_sm2_module.c b/ompi/mca/coll/sm2/coll_sm2_module.c index bb009e0daa..7cdb66bdd1 100644 --- a/ompi/mca/coll/sm2/coll_sm2_module.c +++ b/ompi/mca/coll/sm2/coll_sm2_module.c @@ -336,6 +336,15 @@ static int setup_nary_tree(int tree_order, int my_rank, int num_nodes, } } } + /* set node type */ + if( 0 == my_node->n_parents ) { + my_node->my_node_type=ROOT_NODE; + } else if ( 0 == my_node->n_children ) { + my_node->my_node_type=LEAF_NODE; + } else { + my_node->my_node_type=INTERIOR_NODE; + } + /* successful return */ return OMPI_SUCCESS;