- Remove all vestigates of using the built-in mcb_tree from the
reduce_inorder() function -- we don't use the tree at all. - Add more relevant "volatile"'s for the control buffers in the fragment mpool (and associated casts where necessary) This commit was SVN r7616.
Этот коммит содержится в:
родитель
9a67831ba3
Коммит
b17c4334c4
@ -242,7 +242,7 @@ extern "C" {
|
||||
*/
|
||||
struct mca_coll_base_mpool_index_t {
|
||||
/** Pointer to beginning of control data */
|
||||
uint32_t *mcbmi_control;
|
||||
uint32_t volatile *mcbmi_control;
|
||||
/** Pointer to beginning of message fragment data */
|
||||
char *mcbmi_data;
|
||||
};
|
||||
@ -514,7 +514,7 @@ extern int32_t mca_coll_sm_bogus_free_after;
|
||||
*/
|
||||
#define CHILD_WAIT_FOR_NOTIFY(rank, index, value) \
|
||||
do { \
|
||||
volatile uint32_t *ptr = ((uint32_t*) \
|
||||
uint32_t volatile *ptr = ((uint32_t*) \
|
||||
(((char*) index->mcbmi_control) + \
|
||||
((rank) * mca_coll_sm_component.sm_control_size))); \
|
||||
while (0 == *ptr) SPIN; \
|
||||
@ -527,7 +527,7 @@ extern int32_t mca_coll_sm_bogus_free_after;
|
||||
* segment. Used for fan in operations.
|
||||
*/
|
||||
#define CHILD_NOTIFY_PARENT(child_rank, parent_rank, index, value) \
|
||||
((size_t*) \
|
||||
((size_t volatile *) \
|
||||
(((char*) (index)->mcbmi_control) + \
|
||||
(mca_coll_sm_component.sm_control_size * \
|
||||
(parent_rank))))[(child_rank)] = (value)
|
||||
@ -539,7 +539,7 @@ extern int32_t mca_coll_sm_bogus_free_after;
|
||||
*/
|
||||
#define PARENT_WAIT_FOR_NOTIFY_SPECIFIC(child_rank, parent_rank, index, value) \
|
||||
do { \
|
||||
volatile size_t *ptr = ((size_t *) \
|
||||
size_t volatile *ptr = ((size_t volatile *) \
|
||||
(((char*) index->mcbmi_control) + \
|
||||
(mca_coll_sm_component.sm_control_size * \
|
||||
(parent_rank)))) + child_rank; \
|
||||
|
@ -405,9 +405,9 @@ sm_module_init(struct ompi_communicator_t *comm)
|
||||
/* Memory affinity: control */
|
||||
|
||||
maffinity[j].mbs_len = c->sm_control_size;
|
||||
maffinity[j].mbs_start_addr =
|
||||
data->mcb_mpool_index[i].mcbmi_control +
|
||||
(rank * c->sm_control_size);
|
||||
maffinity[j].mbs_start_addr = (void *)
|
||||
(data->mcb_mpool_index[i].mcbmi_control +
|
||||
(rank * c->sm_control_size));
|
||||
++j;
|
||||
|
||||
/* Memory affinity: data */
|
||||
@ -430,7 +430,7 @@ sm_module_init(struct ompi_communicator_t *comm)
|
||||
memset(data->mcb_barrier_control_me, 0,
|
||||
num_barrier_buffers * 2 * c->sm_control_size);
|
||||
for (i = 0; i < c->sm_comm_num_segments; ++i) {
|
||||
memset(data->mcb_mpool_index[i].mcbmi_control, 0,
|
||||
memset((void *) data->mcb_mpool_index[i].mcbmi_control, 0,
|
||||
c->sm_control_size);
|
||||
}
|
||||
|
||||
|
@ -147,7 +147,6 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count,
|
||||
mca_coll_sm_in_use_flag_t *flag;
|
||||
ompi_convertor_t convertor;
|
||||
mca_coll_base_mpool_index_t *index;
|
||||
mca_coll_sm_tree_node_t *me;
|
||||
int32_t ddt_size;
|
||||
size_t segment_ddt_count, segment_ddt_bytes, zero = 0;
|
||||
|
||||
@ -156,8 +155,6 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count,
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_size(comm);
|
||||
|
||||
me = &data->mcb_tree[(rank + size - root) % size];
|
||||
|
||||
/* Figure out how much we should have the convertor copy. We need
|
||||
to have it be in units of a datatype -- i.e., we only want to
|
||||
copy a whole datatype worth of data or none at all (we've
|
||||
@ -255,10 +252,8 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count,
|
||||
/* Main loop over receiving / reducing fragments */
|
||||
|
||||
do {
|
||||
|
||||
flag_num = (data->mcb_operation_count %
|
||||
mca_coll_sm_component.sm_comm_num_in_use_flags);
|
||||
|
||||
FLAG_SETUP(flag_num, flag, data);
|
||||
FLAG_WAIT_FOR_IDLE(flag);
|
||||
FLAG_RETAIN(flag, size, data->mcb_operation_count);
|
||||
@ -417,7 +412,6 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count,
|
||||
*********************************************************************/
|
||||
|
||||
else {
|
||||
int parent_rank = (me->mcstn_parent->mcstn_id + root) % size;
|
||||
|
||||
/* Here we get a convertor for the full count that the user
|
||||
provided (as opposed to the convertor that the root got) */
|
||||
@ -454,7 +448,7 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count,
|
||||
do {
|
||||
index = &(data->mcb_mpool_index[segment_num]);
|
||||
|
||||
/* Copy from the user's buffer to the shared mem
|
||||
/* Copy from the user's buffer to my shared mem
|
||||
segment */
|
||||
COPY_FRAGMENT_IN(convertor, index, rank, iov, max_data);
|
||||
bytes += max_data;
|
||||
@ -462,8 +456,10 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count,
|
||||
/* Wait for the write to absolutely complete */
|
||||
opal_atomic_wmb();
|
||||
|
||||
/* Tell my parent that this fragment is ready */
|
||||
CHILD_NOTIFY_PARENT(rank, parent_rank, index, max_data);
|
||||
/* Tell my parent (always the reduction root -- we're
|
||||
ignoring the mcb_tree parent/child relationships
|
||||
here) that this fragment is ready */
|
||||
CHILD_NOTIFY_PARENT(rank, root, index, max_data);
|
||||
|
||||
++segment_num;
|
||||
} while (bytes < total_size && segment_num < max_segment_num);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user