1
1
openmpi/ompi/mca/coll/sm/coll_sm_barrier.c
Jeff Squyres 31065f1cc0 First cut of sm coll component infrastrcutre (this is what took so
much time) and somewhat-lame implementation of barrier (need to
precompute some more stuff rather than calculate it every time).
Checkpointing so I can try this on another machine...

This commit was SVN r6985.
2005-08-23 21:22:00 +00:00

145 строки
4.7 KiB
C

/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/include/constants.h"
#include "ompi/communicator/communicator.h"
#include "ompi/mca/coll/coll.h"
#include "opal/include/sys/atomic.h"
#include "coll_sm.h"
#if 0
#define D(foo) printf foo
#else
#define D(foo)
#endif
/*
* barrier
*
* Function: - barrier
* Accepts: - same as MPI_Barrier()
* Returns: - MPI_SUCCESS or error code
*/
int mca_coll_sm_barrier_intra(struct ompi_communicator_t *comm)
{
mca_coll_base_comm_t *data = comm->c_coll_selected_data;
uint32_t *my_control_in, *my_control_out;
uint32_t *parent_control_in;
int i, rank, start_rank, parent, num_children, segment;
char *control_in, *control_out;
segment = (++data->mcb_operation_count % data->mcb_mpool_num_segments);
control_in = data->mcb_mpool_index[segment]->mcbmi_control_fan_in;
control_out = data->mcb_mpool_index[segment]->mcbmi_control_fan_out;
/* THIS CAN BE PRECOMPUTED */
/* Figure out some identities */
rank = ompi_comm_rank(comm);
num_children = mca_coll_sm_component.sm_tree_degree;
parent = (rank - 1) / mca_coll_sm_component.sm_tree_degree;
/* Do we have children? If so, how many? */
if ((rank * num_children) + 1 >= ompi_comm_size(comm)) {
/* Leaves */
num_children = 0;
} else {
int min_child = rank * num_children + 1;
int max_child = rank * num_children + num_children;
if (max_child >= ompi_comm_size(comm)) {
max_child = ompi_comm_size(comm) - 1;;
}
D(("rank %d: min child: %d, max child: %d\n", rank, min_child, max_child));
num_children = max_child - min_child + 1;
}
D(("rank %d: segment %d (opn count: %d), parent %d, num_children = %d\n",
rank, segment, data->mcb_operation_count, parent, num_children));
fflush(stdout);
/* Pre-calculate some pointers */
my_control_in = (uint32_t *)
(control_in + (rank * mca_coll_sm_component.sm_control_size));
my_control_out = (uint32_t *)
(control_out + (rank * mca_coll_sm_component.sm_control_size));
*my_control_out = 0;
if (0 != rank) {
parent_control_in = (uint32_t *)
(control_in + (parent * mca_coll_sm_component.sm_control_size));
} else {
parent_control_in = NULL;
}
/* Fan in: wait for my children */
if (0 != num_children) {
D(("rank %d waiting for fan in from %d children...\n", rank, num_children));
while (*my_control_in != (uint32_t) num_children) {
opal_atomic_wmb();
}
*my_control_in = 0;
D(("rank %d got fan in\n", rank));
}
/* Fan in: send to my parent */
if (NULL != parent_control_in) {
D(("rank %d writing to parent\n", rank));
opal_atomic_add(parent_control_in, 1);
D(("rank %d wrote to parent: %d\n", rank, *parent_control_in));
}
/* Fan out: wait for my parent to write to me (don't poll on
parent's out buffer -- that would cause a lot of network
traffic / contention / faults / etc. -- this way, the children
poll on local memory and therefore only num_children messages
are sent across the network [vs. num_children *each* time all
the children poll] -- i.e., the memory is only being polled by
one process, and it is only changed *once* by an external
process) */
if (NULL != parent_control_in) {
D(("rank %d waiting for fan out from parent\n", rank));
while (0 == *my_control_out) {
opal_atomic_wmb();
}
D(("rank %d got fan out from parent\n", rank));
}
/* Fan out: send to my children */
start_rank = (rank * mca_coll_sm_component.sm_tree_degree) + 1;
for (i = 0; i < num_children; ++i) {
D(("rank %d writing fan out to child %d, rank %d (start %d, num_children %d)\n", rank, i, start_rank + i, start_rank, num_children));
*((uint32_t *)
(control_out + ((start_rank + i) * mca_coll_sm_component.sm_control_size))) = 1;
}
D(("rank %d done with barrier\n", rank));
/* All done! End state of the control segment:
my_control_in: 0
my_control_out: 1
*/
return OMPI_SUCCESS;
}