From 64321acc2245ce7497ffc190e494360a8c55ae98 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Mon, 3 Feb 2014 17:01:57 +0000 Subject: [PATCH] basesmuma: do not call MB directly opal does not always define MB. It is recommended that opal_atomic_[rw]mb is called instead. We will need to address the cases where these functions are no-ops on weak-memory ordered cpus. cmr=v1.7.5:ticket=trac:4158 This commit was SVN r30534. The following Trac tickets were found above: Ticket 4158 --> https://svn.open-mpi.org/trac/ompi/ticket/4158 --- .../bcol/basesmuma/bcol_basesmuma_allgather.c | 8 ++--- .../bcol/basesmuma/bcol_basesmuma_allreduce.c | 6 ++-- .../mca/bcol/basesmuma/bcol_basesmuma_bcast.c | 6 ++-- .../basesmuma/bcol_basesmuma_bcast_prime.c | 22 ++++++------- .../bcol/basesmuma/bcol_basesmuma_gather.c | 6 ++-- .../basesmuma/bcol_basesmuma_lmsg_bcast.c | 32 +++++++++---------- .../bcol_basesmuma_lmsg_knomial_bcast.c | 4 ++- .../basesmuma/bcol_basesmuma_rd_barrier.c | 4 +-- .../basesmuma/bcol_basesmuma_rd_nb_barrier.c | 4 ++- 9 files changed, 49 insertions(+), 43 deletions(-) diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_allgather.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_allgather.c index 57dc43c9e2..676be0a9d1 100644 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_allgather.c +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_allgather.c @@ -207,7 +207,7 @@ MAIN_PHASE: /* we start the recursive k - ing phase */ for( *iteration = 0; *iteration < pow_k; (*iteration)++) { /* announce my arrival */ - MB(); + opal_atomic_wmb (); my_ctl_pointer->flags[ALLGATHER_FLAG][bcol_id] = ready_flag; /* calculate the number of active requests */ CALC_ACTIVE_REQUESTS(active_requests,exchange_node->rank_exchanges[*iteration],tree_order); @@ -271,7 +271,7 @@ MAIN_PHASE: /* finish off the last piece, send the data back to the extra */ if( 0 < exchange_node->n_extra_sources ) { /* simply announce my arrival */ - MB(); + opal_atomic_wmb (); my_ctl_pointer->flags[ALLGATHER_FLAG][bcol_id] = ready_flag; } @@ -433,7 +433,7 @@ MAIN_PHASE: /* start the recursive k - ing phase */ for( *iter=*iteration; *iter < pow_k; (*iter)++) { /* I am ready at this level */ - MB(); + opal_atomic_wmb (); my_ctl_pointer->flags[ALLGATHER_FLAG][bcol_id] = ready_flag; if( 0 == *active_requests ) { /* flip some bits, if we don't have active requests from a previous visit */ @@ -499,7 +499,7 @@ MAIN_PHASE: /* finish off the last piece, send the data back to the extra */ if( 0 < exchange_node->n_extra_sources ) { /* simply announce my arrival */ - MB(); + opal_atomic_wmb (); my_ctl_pointer->flags[ALLGATHER_FLAG][bcol_id] = ready_flag; } diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_allreduce.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_allreduce.c index ce3890563e..48cecaeb42 100644 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_allreduce.c +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_allreduce.c @@ -2,7 +2,7 @@ /* * Copyright (c) 2009-2013 Oak Ridge National Laboratory. All rights reserved. * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -409,7 +409,7 @@ int bcol_basesmuma_allreduce_intra_recursive_doubling(bcol_function_args_t *inpu } /* signal that I have arrived */ - /* MB(); */ + /* opal_atomic_wmb (); */ my_ctl_pointer->sequence_number = sequence_number; /* If we use this buffer more than once by an sm module in @@ -581,7 +581,7 @@ int bcol_basesmuma_allreduce_intra_recursive_doubling(bcol_function_args_t *inpu /* * Signal parent that data is ready */ - MB(); + opal_atomic_wmb (); /*my_ctl_pointer->flags[ALLREDUCE_FLAG] = ready_flag;*/ my_ctl_pointer->flag = ready_flag; diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast.c index 28633548cf..a87e9bde96 100644 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast.c +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast.c @@ -1,6 +1,8 @@ /* * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. + * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -236,7 +238,7 @@ int bcol_basesmuma_bcast(bcol_function_args_t *input_args, memcpy(data_addr, (void *)parent_data_pointer,pack_len); /* Signal to children that they may read the data from my shared buffer */ - MB(); + opal_atomic_wmb (); my_ctl_pointer->flags[BCAST_FLAG][bcol_id] = ready_flag; } @@ -438,7 +440,7 @@ int bcol_basesmuma_hdl_zerocopy_bcast(bcol_function_args_t *input_args, } /* Signal to children that they may read the data from my shared buffer */ - MB(); + opal_atomic_wmb (); hdl_desc->des_src = hdl_seg; hdl_desc->des_src_cnt = 1; for (ridx = 0; ridx < my_fanout_read_tree->n_children; ridx++) { diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast_prime.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast_prime.c index 12d26fe7b0..b843e55e61 100644 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast_prime.c +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_bcast_prime.c @@ -165,7 +165,7 @@ int bcol_basesmuma_bcast_k_nomial_knownroot(bcol_function_args_t *input_args, } else { ready_flag = my_ctl_pointer->flags[BCAST_FLAG][bcol_id]; } - MB(); + opal_atomic_wmb (); my_ctl_pointer->sequence_number = sequence_number; */ @@ -178,7 +178,7 @@ int bcol_basesmuma_bcast_k_nomial_knownroot(bcol_function_args_t *input_args, /* * signal ready flag */ - MB(); + opal_atomic_wmb (); my_ctl_pointer->flags[BCAST_FLAG][bcol_id] = ready_flag; /* root is finished */ @@ -214,7 +214,7 @@ int bcol_basesmuma_bcast_k_nomial_knownroot(bcol_function_args_t *input_args, /* copy the data */ memcpy(data_addr, (void *) parent_data_pointer, pack_len); /* set the memory barrier to ensure completion */ - MB(); + opal_atomic_wmb (); /* signal that I am done */ my_ctl_pointer->flags[BCAST_FLAG][bcol_id] = ready_flag; @@ -312,7 +312,7 @@ int bcol_basesmuma_bcast_k_nomial_anyroot(bcol_function_args_t *input_args, * set the radix_mask */ radix_mask = pow_k_group_size; /* send to children */ - MB(); + opal_atomic_wmb (); BASESMUMA_K_NOMIAL_SEND_CHILDREN(radix_mask, radix,0, my_rank,group_size, ready_flag); @@ -347,7 +347,7 @@ int bcol_basesmuma_bcast_k_nomial_anyroot(bcol_function_args_t *input_args, radix_mask /= radix; /* send to children */ - MB(); + opal_atomic_wmb (); BASESMUMA_K_NOMIAL_SEND_CHILDREN(radix_mask, radix, relative_rank, my_rank, group_size, ready_flag); @@ -515,7 +515,7 @@ int bcol_basesmuma_binary_scatter_allgather_segment(bcol_function_args_t *input_ /* important that these be set before my children * see the ready flag raised */ - MB(); + opal_atomic_wmb (); my_ctl_pointer->flag = ready_flag; /* root is finished */ @@ -638,7 +638,7 @@ int bcol_basesmuma_binary_scatter_allgather_segment(bcol_function_args_t *input_ my_ctl_pointer->n_sends = parent_ctl_pointer->n_sends; /* set the memory barrier */ - MB(); + opal_atomic_wmb (); /* fire the ready flag */ my_ctl_pointer->flag = ready_flag; @@ -696,7 +696,7 @@ int bcol_basesmuma_binary_scatter_allgather_segment(bcol_function_args_t *input_ (size_t)length); } /* set the memory barrier to ensure completion */ - MB(); + opal_atomic_wmb (); /* signal that I am done */ my_ctl_pointer->flag = ready_flag; /* set my status */ @@ -746,14 +746,14 @@ int bcol_basesmuma_binary_scatter_allgather_segment(bcol_function_args_t *input_ local_offset = my_ctl_pointer->offset_zip; /* compute the correct length */ length = length*(1<<(start - 1)); - /* careful! skip over the MB() to avoid the + /* careful! skip over the opal_atomic_wmb () to avoid the * cost on every re-entry */ goto Loop; } - MB(); + opal_atomic_wmb (); /* I am ready, set the flag */ my_ctl_pointer->flag = ready_flag; @@ -813,7 +813,7 @@ int bcol_basesmuma_binary_scatter_allgather_segment(bcol_function_args_t *input_ /* bump the ready flag */ ready_flag++; /* ensure completion */ - MB(); + opal_atomic_wmb (); /* fire the flag for the next level */ my_ctl_pointer->flag = ready_flag; diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_gather.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_gather.c index d4166f5b13..b9d4a39580 100644 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_gather.c +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_gather.c @@ -2,7 +2,7 @@ /* * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -698,7 +698,7 @@ int bcol_basesmuma_k_nomial_gather(bcol_function_args_t *input_args, exchange_node->rank_exchanges[i][j], buff_offset + exchange_node->payload_info[i][j].r_offset, exchange_node->payload_info[i][j].r_len*pack_len); */ - MB(); + opal_atomic_wmb (); knt++; if(knt == exchange_node->n_actual_exchanges) { /* this is the trick to break the root out, @@ -1063,7 +1063,7 @@ int bcol_basesmuma_k_nomial_gather(bcol_function_args_t *input_args, fprintf(stderr,"hello n_actual_exch is %d \n", exchange_node->n_actual_exchanges); */ - MB(); + opal_atomic_wmb (); my_ctl_pointer->gflag = ready_flag; goto LAST_STEP; diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_bcast.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_bcast.c index 8510258eca..3cd0b0f814 100644 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_bcast.c +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_bcast.c @@ -215,7 +215,7 @@ int bcol_basesmuma_lmsg_scatter_allgather_portals_bcast_old(bcol_function_args_t /* important that these be set before my children * see the ready flag raised */ - MB(); + opal_atomic_wmb (); my_ctl_pointer->flag = ready_flag; /* Wait for my scatter partner */ @@ -342,7 +342,7 @@ Probe: /* important that these be set before my children * see the ready flag raised */ - MB(); + opal_atomic_wmb (); my_ctl_pointer->flag = ready_flag; wait_for_peers(my_rank, my_lmsg_ctl_pointer->n_sends, data_buffs, @@ -415,7 +415,7 @@ Probe: msg_posted = true; /* set the memory barrier to ensure completion * and signal I am done getting scatter data*/ - MB(); + opal_atomic_wmb (); my_ctl_pointer->flag = ready_flag; wait_for_peers(my_rank, my_lmsg_ctl_pointer->n_sends, data_buffs, @@ -441,7 +441,7 @@ Probe: remote_offset, length); /* signal that I am done reading data from parent */ - MB(); + opal_atomic_wmb (); my_ctl_pointer->flag = ready_flag; } @@ -499,7 +499,7 @@ Allgather: ready_flag++; - MB(); + opal_atomic_wmb (); my_ctl_pointer->flag = ready_flag; /* @@ -533,7 +533,7 @@ Allgather: local_sg_offset, length); ready_flag++; - MB(); + opal_atomic_wmb (); my_ctl_pointer->flag = ready_flag; /* Block until partner completed this level of recursive-doubling stage */ @@ -753,7 +753,7 @@ int bcol_basesmuma_lmsg_scatter_allgather_portals_bcast(bcol_function_args_t *in /* important that these be set before my children * see the ready flag raised */ - MB(); + opal_atomic_wmb (); my_ctl_pointer->flags[BCAST_FLAGS] = ready_flag; BASESMUMA_VERBOSE(1,("I am the root(ctl_pointer %x) of the data flag value %d",my_ctl_pointer, my_ctl_pointer->flag)); /* Wait for my scatter partner */ @@ -879,7 +879,7 @@ Probe: /* important that these be set before my children * see the ready flag raised */ - MB(); + opal_atomic_wmb (); my_ctl_pointer->flags[BCAST_FLAG] = ready_flag; wait_for_peers_nb(my_rank, my_ctl_pointer->n_sends, ctl_structs, @@ -949,7 +949,7 @@ Probe: ); msg_posted = true; /* set the memory barrier to ensure completion */ - MB(); + opal_atomic_wmb (); /* signal that I am done */ my_ctl_pointer->flags[BCAST_FLAG] = ready_flag; @@ -977,7 +977,7 @@ Probe: remote_offset, length); /* signal that I am done reading data from parent */ - MB(); + opal_atomic_wmb (); my_ctl_pointer->flags[BCAST_FLAG] = ready_flag; } @@ -1034,7 +1034,7 @@ Allgather: } ready_flag++; - MB(); + opal_atomic_wmb (); my_ctl_pointer->flags[BCAST_FLAG] = ready_flag; for( i = 1; i < pow_2_levels; i++) { @@ -1064,7 +1064,7 @@ Allgather: local_sg_offset, length); ready_flag++; - MB(); + opal_atomic_wmb (); my_ctl_pointer->flags[BCAST_FLAG] = ready_flag; /* Block until partner is at this level of recursive-doubling stage */ @@ -1257,7 +1257,7 @@ Extra : rc = sm_portals_root_scatter(sg_state); /* gvm Fix: Redudant - MB(); + opal_atomic_wmb (); */ sg_state->my_ctl_pointer->flag = sg_state->ready_flag; @@ -1433,7 +1433,7 @@ Scatter_parent_wait: /* signal that I am done reading data from parent */ /* - MB(); + opal_atomic_wmb (); */ sg_state->my_ctl_pointer->flag = sg_state->ready_flag; } @@ -1487,7 +1487,7 @@ Allgather: BASESMUMA_VERBOSE(5,("Done with allgather phase")); /* I reached an allgather phase */ sg_state->ready_flag++; - MB(); + opal_atomic_wmb (); sg_state->my_ctl_pointer->flag = sg_state->ready_flag; rc = sm_portals_bcasts_allgather_phase(sg_state); @@ -1840,7 +1840,7 @@ Allgather: } sg_state->ready_flag++; - MB(); + opal_atomic_wmb (); sg_state->my_ctl_pointer->flag = sg_state->ready_flag; rc = sm_portals_bcasts_allgather_phase(sg_state); diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_knomial_bcast.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_knomial_bcast.c index af9315bc39..f3c6d16c2b 100644 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_knomial_bcast.c +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_lmsg_knomial_bcast.c @@ -1,6 +1,8 @@ /* * Copyright (c) 2009-2012 Oak Ridge National Laboratory. All rights reserved. * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. + * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -214,7 +216,7 @@ int bcol_basesmuma_lmsg_bcast_k_nomial_anyroot(bcol_function_args_t *input_args, mca_bcol_basesmuma_portals_get_msg(cs, parent_lmsg_ctl_pointer, userbuf, pack_len); /* set the memory barrier to ensure completion */ - MB(); + opal_atomic_wmb (); /* signal that I am done */ my_ctl_pointer->flag = ready_flag; diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_barrier.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_barrier.c index 4b95fec384..9adba7b58e 100644 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_barrier.c +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_barrier.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2009-2013 Oak Ridge National Laboratory. All rights reserved. * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights + * Copyright (c) 2013-2014 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -91,7 +91,7 @@ int bcol_basesmuma_recursive_double_barrier(bcol_function_args_t *input_args, my_ctl->flag = -1; /* don't need to set this flag anymore */ my_ctl->sequence_number = sequence_number; - /* MB();*/ + /* opal_atomic_wmb ();*/ if(0 < my_exchange_node->n_extra_sources) { if (EXCHANGE_NODE == my_exchange_node->node_type) { diff --git a/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_nb_barrier.c b/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_nb_barrier.c index 4cb66f3cd8..22e4ffb7cc 100644 --- a/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_nb_barrier.c +++ b/ompi/mca/bcol/basesmuma/bcol_basesmuma_rd_nb_barrier.c @@ -2,6 +2,8 @@ * Copyright (c) 2009-2012 UT-Battelle, LLC. All rights reserved. * Copyright (c) 2009-2012 Mellanox Technologies. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2014 Los Alamos National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -72,7 +74,7 @@ int bcol_basesmuma_rd_nb_barrier_init_admin( /* signal that I have arrived */ my_ctl->flag = -1; - MB(); + opal_atomic_wmb (); /* don't need to set this flag anymore */ my_ctl->sequence_number = bank_genaration;