1
1

- Remove all vestigates of using the built-in mcb_tree from the

reduce_inorder() function -- we don't use the tree at all.
- Add more relevant "volatile"'s for the control buffers in the
  fragment mpool (and associated casts where necessary)

This commit was SVN r7616.
Этот коммит содержится в:
Jeff Squyres 2005-10-04 14:52:59 +00:00
родитель 9a67831ba3
Коммит b17c4334c4
3 изменённых файлов: 14 добавлений и 18 удалений

Просмотреть файл

@ -242,7 +242,7 @@ extern "C" {
*/ */
struct mca_coll_base_mpool_index_t { struct mca_coll_base_mpool_index_t {
/** Pointer to beginning of control data */ /** Pointer to beginning of control data */
uint32_t *mcbmi_control; uint32_t volatile *mcbmi_control;
/** Pointer to beginning of message fragment data */ /** Pointer to beginning of message fragment data */
char *mcbmi_data; char *mcbmi_data;
}; };
@ -514,7 +514,7 @@ extern int32_t mca_coll_sm_bogus_free_after;
*/ */
#define CHILD_WAIT_FOR_NOTIFY(rank, index, value) \ #define CHILD_WAIT_FOR_NOTIFY(rank, index, value) \
do { \ do { \
volatile uint32_t *ptr = ((uint32_t*) \ uint32_t volatile *ptr = ((uint32_t*) \
(((char*) index->mcbmi_control) + \ (((char*) index->mcbmi_control) + \
((rank) * mca_coll_sm_component.sm_control_size))); \ ((rank) * mca_coll_sm_component.sm_control_size))); \
while (0 == *ptr) SPIN; \ while (0 == *ptr) SPIN; \
@ -527,7 +527,7 @@ extern int32_t mca_coll_sm_bogus_free_after;
* segment. Used for fan in operations. * segment. Used for fan in operations.
*/ */
#define CHILD_NOTIFY_PARENT(child_rank, parent_rank, index, value) \ #define CHILD_NOTIFY_PARENT(child_rank, parent_rank, index, value) \
((size_t*) \ ((size_t volatile *) \
(((char*) (index)->mcbmi_control) + \ (((char*) (index)->mcbmi_control) + \
(mca_coll_sm_component.sm_control_size * \ (mca_coll_sm_component.sm_control_size * \
(parent_rank))))[(child_rank)] = (value) (parent_rank))))[(child_rank)] = (value)
@ -539,7 +539,7 @@ extern int32_t mca_coll_sm_bogus_free_after;
*/ */
#define PARENT_WAIT_FOR_NOTIFY_SPECIFIC(child_rank, parent_rank, index, value) \ #define PARENT_WAIT_FOR_NOTIFY_SPECIFIC(child_rank, parent_rank, index, value) \
do { \ do { \
volatile size_t *ptr = ((size_t *) \ size_t volatile *ptr = ((size_t volatile *) \
(((char*) index->mcbmi_control) + \ (((char*) index->mcbmi_control) + \
(mca_coll_sm_component.sm_control_size * \ (mca_coll_sm_component.sm_control_size * \
(parent_rank)))) + child_rank; \ (parent_rank)))) + child_rank; \

Просмотреть файл

@ -405,9 +405,9 @@ sm_module_init(struct ompi_communicator_t *comm)
/* Memory affinity: control */ /* Memory affinity: control */
maffinity[j].mbs_len = c->sm_control_size; maffinity[j].mbs_len = c->sm_control_size;
maffinity[j].mbs_start_addr = maffinity[j].mbs_start_addr = (void *)
data->mcb_mpool_index[i].mcbmi_control + (data->mcb_mpool_index[i].mcbmi_control +
(rank * c->sm_control_size); (rank * c->sm_control_size));
++j; ++j;
/* Memory affinity: data */ /* Memory affinity: data */
@ -430,7 +430,7 @@ sm_module_init(struct ompi_communicator_t *comm)
memset(data->mcb_barrier_control_me, 0, memset(data->mcb_barrier_control_me, 0,
num_barrier_buffers * 2 * c->sm_control_size); num_barrier_buffers * 2 * c->sm_control_size);
for (i = 0; i < c->sm_comm_num_segments; ++i) { for (i = 0; i < c->sm_comm_num_segments; ++i) {
memset(data->mcb_mpool_index[i].mcbmi_control, 0, memset((void *) data->mcb_mpool_index[i].mcbmi_control, 0,
c->sm_control_size); c->sm_control_size);
} }

Просмотреть файл

@ -147,7 +147,6 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count,
mca_coll_sm_in_use_flag_t *flag; mca_coll_sm_in_use_flag_t *flag;
ompi_convertor_t convertor; ompi_convertor_t convertor;
mca_coll_base_mpool_index_t *index; mca_coll_base_mpool_index_t *index;
mca_coll_sm_tree_node_t *me;
int32_t ddt_size; int32_t ddt_size;
size_t segment_ddt_count, segment_ddt_bytes, zero = 0; size_t segment_ddt_count, segment_ddt_bytes, zero = 0;
@ -156,8 +155,6 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count,
rank = ompi_comm_rank(comm); rank = ompi_comm_rank(comm);
size = ompi_comm_size(comm); size = ompi_comm_size(comm);
me = &data->mcb_tree[(rank + size - root) % size];
/* Figure out how much we should have the convertor copy. We need /* Figure out how much we should have the convertor copy. We need
to have it be in units of a datatype -- i.e., we only want to to have it be in units of a datatype -- i.e., we only want to
copy a whole datatype worth of data or none at all (we've copy a whole datatype worth of data or none at all (we've
@ -255,10 +252,8 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count,
/* Main loop over receiving / reducing fragments */ /* Main loop over receiving / reducing fragments */
do { do {
flag_num = (data->mcb_operation_count % flag_num = (data->mcb_operation_count %
mca_coll_sm_component.sm_comm_num_in_use_flags); mca_coll_sm_component.sm_comm_num_in_use_flags);
FLAG_SETUP(flag_num, flag, data); FLAG_SETUP(flag_num, flag, data);
FLAG_WAIT_FOR_IDLE(flag); FLAG_WAIT_FOR_IDLE(flag);
FLAG_RETAIN(flag, size, data->mcb_operation_count); FLAG_RETAIN(flag, size, data->mcb_operation_count);
@ -417,7 +412,6 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count,
*********************************************************************/ *********************************************************************/
else { else {
int parent_rank = (me->mcstn_parent->mcstn_id + root) % size;
/* Here we get a convertor for the full count that the user /* Here we get a convertor for the full count that the user
provided (as opposed to the convertor that the root got) */ provided (as opposed to the convertor that the root got) */
@ -454,7 +448,7 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count,
do { do {
index = &(data->mcb_mpool_index[segment_num]); index = &(data->mcb_mpool_index[segment_num]);
/* Copy from the user's buffer to the shared mem /* Copy from the user's buffer to my shared mem
segment */ segment */
COPY_FRAGMENT_IN(convertor, index, rank, iov, max_data); COPY_FRAGMENT_IN(convertor, index, rank, iov, max_data);
bytes += max_data; bytes += max_data;
@ -462,8 +456,10 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count,
/* Wait for the write to absolutely complete */ /* Wait for the write to absolutely complete */
opal_atomic_wmb(); opal_atomic_wmb();
/* Tell my parent that this fragment is ready */ /* Tell my parent (always the reduction root -- we're
CHILD_NOTIFY_PARENT(rank, parent_rank, index, max_data); ignoring the mcb_tree parent/child relationships
here) that this fragment is ready */
CHILD_NOTIFY_PARENT(rank, root, index, max_data);
++segment_num; ++segment_num;
} while (bytes < total_size && segment_num < max_segment_num); } while (bytes < total_size && segment_num < max_segment_num);