1
1

simplify the bcast code by taking abstract actions and making them

macros -- will help with the other algorithms

This commit was SVN r7214.
Этот коммит содержится в:
Jeff Squyres 2005-09-07 13:33:43 +00:00
родитель 3e002203a0
Коммит 9302f924ea
3 изменённых файлов: 138 добавлений и 123 удалений

Просмотреть файл

@ -284,7 +284,6 @@ extern "C" {
*/
extern mca_coll_sm_component_t mca_coll_sm_component;
/*
* coll module functions
*/
@ -385,4 +384,109 @@ extern "C" {
}
#endif
/**
* Global variables used in the macros (essentially constants, so
* these are thread safe)
*/
extern uint32_t mca_coll_sm_iov_size;
extern int32_t mca_coll_sm_bogus_free_after;
/**
* Macro to setup flag usage
*/
#define FLAG_SETUP(flag_num, flag, data) \
(flag) = (mca_coll_sm_in_use_flag_t*) \
(((char *) (data)->mcb_in_use_flags) + \
((flag_num) * mca_coll_sm_component.sm_control_size));
/**
* Macro to wait for the in-use flag to become idle (used by the root)
*/
#define FLAG_WAIT_FOR_IDLE(flag) \
while (0 != (flag)->mcsiuf_num_procs_using) continue;
/**
* Macro to wait for a flag to indicate that it's ready for this
* operation (used by non-root processes to know when FLAG_SET() has
* been called)
*/
#define FLAG_WAIT_FOR_OP(flag, op) \
while ((op) != flag->mcsiuf_operation_count) continue;
/**
* Macro to set an in-use flag with relevant data to claim it
*/
#define FLAG_RETAIN(flag, num_procs, op_count) \
(flag)->mcsiuf_num_procs_using = (num_procs); \
(flag)->mcsiuf_operation_count = (op_count);
/**
* Macro to release an in-use flag from this process
*/
#define FLAG_RELEASE(flag) \
opal_atomic_add(&(flag)->mcsiuf_num_procs_using, -1);
/**
* Macro to copy a single segment in from a user buffer to a shared
* segment
*/
#define COPY_FRAGMENT_IN(convertor, index, iov, max_data) \
(iov).iov_base = \
(index)->mcbmi_data + \
(rank * mca_coll_sm_component.sm_fragment_size); \
(max_data) = (iov).iov_len = mca_coll_sm_component.sm_fragment_size; \
ompi_convertor_pack(&(convertor), &(iov), &mca_coll_sm_iov_size, \
&(max_data), &mca_coll_sm_bogus_free_after);
/**
* Macro to copy a single segment out from a shared segment to a user
* buffer
*/
#define COPY_FRAGMENT_OUT(convertor, src_rank, index, iov, max_data) \
(iov).iov_base = (((char*) (index)->mcbmi_data) + \
((src_rank) * mca_coll_sm_component.sm_fragment_size)); \
ompi_convertor_unpack(&(convertor), &(iov), &mca_coll_sm_iov_size, \
&(max_data), &mca_coll_sm_bogus_free_after);
/**
* Macro to memcpy a fragment between one shared segment and another
*/
#define COPY_FRAGMENT_BETWEEN(src_rank, dest_rank, index, len) \
memcpy(((index)->mcbmi_data + \
((dest_rank) * mca_coll_sm_component.sm_fragment_size)), \
((index)->mcbmi_data + \
((src_rank) * \
mca_coll_sm_component.sm_fragment_size)), \
(len));
/**
* Macro to tell children that a segment is ready (normalize the
* child's ID based on the shift used to calculate the "me" node in
* the tree)
*/
#define PARENT_NOTIFY_CHILDREN(children, num_children, index, value) \
for (i = 0; i < (num_children); ++i) { \
*((size_t*) \
(((char*) index->mcbmi_control) + \
(mca_coll_sm_component.sm_control_size * \
(((children)[i]->mcstn_id + root) % size)))) = (value); \
}
/**
* Macro for childen to wait for parent notification (use real rank).
* Save the value passed and then reset it when done.
*/
#define CHILD_WAIT_FOR_NOTIFY(rank, index, value) \
while (0 == *((volatile uint32_t*) \
(((char*) index->mcbmi_control) + \
((rank) * mca_coll_sm_component.sm_control_size)))) { \
continue; \
} \
(value) = *((volatile uint32_t*) \
(((char*) index->mcbmi_control) + \
((rank) * mca_coll_sm_component.sm_control_size))); \
*((uint32_t*) (((char*) index->mcbmi_control) + \
((rank) * mca_coll_sm_component.sm_control_size))) = 0;
#endif /* MCA_COLL_SM_EXPORT_H */

Просмотреть файл

@ -46,17 +46,15 @@ int mca_coll_sm_bcast_intra(void *buff, int count,
struct ompi_communicator_t *comm)
{
struct iovec iov;
uint32_t iov_size = 1;
mca_coll_base_comm_t *data = comm->c_coll_selected_data;
int i, ret, rank, size, num_children;
int i, ret, rank, size, num_children, src_rank;
int flag_num, segment_num, max_segment_num;
int parent_rank, child_rank;
int parent_rank;
size_t total_size, max_data, bytes;
volatile uint32_t *my_control;
mca_coll_sm_in_use_flag_t *flag;
ompi_convertor_t convertor;
mca_coll_sm_tree_node_t *me, *parent, **children;
int32_t bogus_free_after = 0;
mca_coll_base_mpool_index_t *index;
/* Setup some identities */
@ -106,27 +104,9 @@ int mca_coll_sm_bcast_intra(void *buff, int count,
flag_num = (data->mcb_operation_count++ %
mca_coll_sm_component.sm_comm_num_in_use_flags);
/* Wait for the set of segments to become available */
flag = (mca_coll_sm_in_use_flag_t*)
(((char *) data->mcb_in_use_flags) +
(flag_num * mca_coll_sm_component.sm_control_size));
D(("root waiting for in_use flag %d (value %d), %p\n",
flag_num, flag->mcsiuf_num_procs_using, flag));
while (0 != flag->mcsiuf_num_procs_using) {
continue;
}
D(("root got in_use flag %d (value %d), %p\n",
flag_num, flag->mcsiuf_num_procs_using, flag));
/* Now that the set of segments is availble, mark it as
used. No need to include the root in the count (we'd
only have to decrement it later). Don't need a write
barrier here -- we have another later that will
guarantee that the write has completed, if
necessary. */
flag->mcsiuf_num_procs_using = size - 1;
flag->mcsiuf_operation_count = data->mcb_operation_count - 1;
FLAG_SETUP(flag_num, flag, data);
FLAG_WAIT_FOR_IDLE(flag);
FLAG_RETAIN(flag, size - 1, data->mcb_operation_count - 1);
/* Loop over all the segments in this set */
@ -135,40 +115,19 @@ int mca_coll_sm_bcast_intra(void *buff, int count,
max_segment_num = (flag_num + 1) *
mca_coll_sm_component.sm_comm_num_in_use_flags;
do {
index = &(data->mcb_mpool_index[segment_num]);
/* Copy the fragment from the user buffer to my fragment
in the current segment */
iov.iov_base =
data->mcb_mpool_index[segment_num].mcbmi_data +
(rank * mca_coll_sm_component.sm_fragment_size);
max_data = iov.iov_len;
D(("root copying %lu bytes to data fan out, seg %d: %p\n",
(unsigned long) iov.iov_len, segment_num, iov.iov_base));
ompi_convertor_pack(&convertor, &iov, &iov_size,
&max_data, &bogus_free_after);
COPY_FRAGMENT_IN(convertor, index, iov, max_data);
bytes += max_data;
/* Wait for the write to absolutely complete */
opal_atomic_wmb();
/* Tell my children that this fragment is ready (be
sure to normalize the child's ID based on the shift
we did above to calculate the "me" node in the
tree) */
for (i = 0; i < num_children; ++i) {
child_rank = (children[i]->mcstn_id + root) % size;
*((size_t*)
(((char*)
data->mcb_mpool_index[segment_num].mcbmi_control) +
(mca_coll_sm_component.sm_control_size *
child_rank))) = max_data;
D(("root sent notice to child %d (vrank %d), control to %p\n",
i, children[i]->mcstn_id,
(((char*)
data->mcb_mpool_index[segment_num].mcbmi_control) +
(mca_coll_sm_component.sm_control_size *
child_rank))));
}
/* Tell my children that this fragment is ready */
PARENT_NOTIFY_CHILDREN(children, num_children, index,
max_data);
++segment_num;
} while (bytes < total_size && segment_num < max_segment_num);
@ -207,13 +166,8 @@ int mca_coll_sm_bcast_intra(void *buff, int count,
/* Wait for the root to mark this set of segments as
ours */
flag = (mca_coll_sm_in_use_flag_t*)
(((char *) data->mcb_in_use_flags) +
(flag_num * mca_coll_sm_component.sm_control_size));
D(("rank %d waiting for root to claim in-use flag %d, %p (op count %d)\n", rank, flag_num, flag, data->mcb_operation_count));
while (data->mcb_operation_count != flag->mcsiuf_operation_count) {
continue;
}
FLAG_SETUP(flag_num, flag, data);
FLAG_WAIT_FOR_OP(flag, data->mcb_operation_count);
++data->mcb_operation_count;
/* Loop over all the segments in this set */
@ -224,71 +178,32 @@ int mca_coll_sm_bcast_intra(void *buff, int count,
mca_coll_sm_component.sm_comm_num_in_use_flags;
do {
/* Pre-calculate some pointers */
/* Pre-calculate some values */
parent_rank = (parent->mcstn_id + root) % size;
D(("rank %d parent rank is %d\n", rank, parent_rank));
my_control = (uint32_t *)
(((char*)
data->mcb_mpool_index[segment_num].mcbmi_control) +
(rank * mca_coll_sm_component.sm_control_size));
index = &(data->mcb_mpool_index[segment_num]);
/* Wait for the fragment: the parent will mark the segment
as ready */
D(("rank %d waiting for fragment in segment %d (control %p)\n",
rank, segment_num, (char*) my_control));
while (0 == *my_control) {
continue;
}
max_data = *my_control;
D(("rank %d: fragment ready in segment %d\n", rank, segment_num));
/* Wait for my parent to tell me that the segment is ready */
CHILD_WAIT_FOR_NOTIFY(rank, index, max_data);
/* If I have children, send the data to them */
if (num_children > 0) {
/* No need to wait for the segment to become
available -- the root has already claimed it
and we're all already using it. So copy the
fragment from the parent's portion in the
segment to my portion in the segment. This is
a simply memcpy because it's already been
packed into the parent's segment. */
memcpy(/* my data fan out section in the segment */
(data->mcb_mpool_index[segment_num].mcbmi_data +
(rank * mca_coll_sm_component.sm_fragment_size)),
/* parent's fan out section in the segment */
(data->mcb_mpool_index[segment_num].mcbmi_data +
(parent_rank *
mca_coll_sm_component.sm_fragment_size)),
/* length */
*my_control);
D(("rank %d memcopy'ed fragment (%p) to my data fan out (%lu bytes)\n", rank,
(data->mcb_mpool_index[segment_num].mcbmi_data +
(parent_rank * mca_coll_sm_component.sm_fragment_size)),
(unsigned long) *my_control));
/* Copy the fragment from the parent's portion in
the segment to my portion in the segment. */
COPY_FRAGMENT_BETWEEN(parent_rank, rank, index, max_data);
/* Wait for the write to absolutely complete */
opal_atomic_wmb();
/* Tell my children that this fragment is ready */
for (i = 0; i < num_children; ++i) {
child_rank = (children[i]->mcstn_id + root) % size;
*((size_t*)
(((char*)
data->mcb_mpool_index[segment_num].mcbmi_control) +
(mca_coll_sm_component.sm_control_size *
child_rank))) = *my_control;
D(("rank %d notifying child %d (vrank %d, rank %d)\n",
rank, i, children[i]->mcstn_id, child_rank));
}
PARENT_NOTIFY_CHILDREN(children, num_children, index,
max_data);
/* Set the "copy from buffer" to be my local
segment buffer so that we don't potentially
incur a non-local memory copy from the parent's
fan out data segment [again] when copying to
the user's buffer */
iov.iov_base =
data->mcb_mpool_index[segment_num].mcbmi_data +
(rank * mca_coll_sm_component.sm_fragment_size);
src_rank = rank;
}
/* If I don't have any children, set the "copy from
@ -296,20 +211,12 @@ int mca_coll_sm_bcast_intra(void *buff, int count,
directly from my parent */
else {
iov.iov_base =
(((char*)
data->mcb_mpool_index[segment_num].mcbmi_data) +
(parent_rank *
mca_coll_sm_component.sm_fragment_size));
src_rank = parent_rank;
}
/* Copy to my output buffer */
D(("rank %d convertor copied from parent data %p to user buffer (%lu bytes)\n",
rank, iov.iov_base, (unsigned long) iov.iov_len));
ompi_convertor_unpack(&convertor, &iov, &iov_size,
&max_data, &bogus_free_after);
COPY_FRAGMENT_OUT(convertor, src_rank, index, iov, max_data);
*my_control = 0;
bytes += max_data;
++segment_num;
} while (bytes < total_size && segment_num < max_segment_num);
@ -319,10 +226,7 @@ int mca_coll_sm_bcast_intra(void *buff, int count,
opal_atomic_wmb();
/* We're finished with this set of segments */
D(("rank %d done with in-use flag %d (value %d)\n",
rank, flag_num, flag->mcsiuf_num_procs_using));
opal_atomic_add(&flag->mcsiuf_num_procs_using, -1);
FLAG_RELEASE(flag);
} while (bytes < total_size);
}

Просмотреть файл

@ -44,6 +44,13 @@
#include "coll_sm.h"
/*
* Global variables
*/
uint32_t mca_coll_sm_iov_size = 1;
int32_t mca_coll_sm_bogus_free_after = 0;
/*
* Local functions
*/