- Remove all vestigates of using the built-in mcb_tree from the
reduce_inorder() function -- we don't use the tree at all. - Add more relevant "volatile"'s for the control buffers in the fragment mpool (and associated casts where necessary) This commit was SVN r7616.
Этот коммит содержится в:
родитель
9a67831ba3
Коммит
b17c4334c4
@ -242,7 +242,7 @@ extern "C" {
|
|||||||
*/
|
*/
|
||||||
struct mca_coll_base_mpool_index_t {
|
struct mca_coll_base_mpool_index_t {
|
||||||
/** Pointer to beginning of control data */
|
/** Pointer to beginning of control data */
|
||||||
uint32_t *mcbmi_control;
|
uint32_t volatile *mcbmi_control;
|
||||||
/** Pointer to beginning of message fragment data */
|
/** Pointer to beginning of message fragment data */
|
||||||
char *mcbmi_data;
|
char *mcbmi_data;
|
||||||
};
|
};
|
||||||
@ -514,7 +514,7 @@ extern int32_t mca_coll_sm_bogus_free_after;
|
|||||||
*/
|
*/
|
||||||
#define CHILD_WAIT_FOR_NOTIFY(rank, index, value) \
|
#define CHILD_WAIT_FOR_NOTIFY(rank, index, value) \
|
||||||
do { \
|
do { \
|
||||||
volatile uint32_t *ptr = ((uint32_t*) \
|
uint32_t volatile *ptr = ((uint32_t*) \
|
||||||
(((char*) index->mcbmi_control) + \
|
(((char*) index->mcbmi_control) + \
|
||||||
((rank) * mca_coll_sm_component.sm_control_size))); \
|
((rank) * mca_coll_sm_component.sm_control_size))); \
|
||||||
while (0 == *ptr) SPIN; \
|
while (0 == *ptr) SPIN; \
|
||||||
@ -527,7 +527,7 @@ extern int32_t mca_coll_sm_bogus_free_after;
|
|||||||
* segment. Used for fan in operations.
|
* segment. Used for fan in operations.
|
||||||
*/
|
*/
|
||||||
#define CHILD_NOTIFY_PARENT(child_rank, parent_rank, index, value) \
|
#define CHILD_NOTIFY_PARENT(child_rank, parent_rank, index, value) \
|
||||||
((size_t*) \
|
((size_t volatile *) \
|
||||||
(((char*) (index)->mcbmi_control) + \
|
(((char*) (index)->mcbmi_control) + \
|
||||||
(mca_coll_sm_component.sm_control_size * \
|
(mca_coll_sm_component.sm_control_size * \
|
||||||
(parent_rank))))[(child_rank)] = (value)
|
(parent_rank))))[(child_rank)] = (value)
|
||||||
@ -539,7 +539,7 @@ extern int32_t mca_coll_sm_bogus_free_after;
|
|||||||
*/
|
*/
|
||||||
#define PARENT_WAIT_FOR_NOTIFY_SPECIFIC(child_rank, parent_rank, index, value) \
|
#define PARENT_WAIT_FOR_NOTIFY_SPECIFIC(child_rank, parent_rank, index, value) \
|
||||||
do { \
|
do { \
|
||||||
volatile size_t *ptr = ((size_t *) \
|
size_t volatile *ptr = ((size_t volatile *) \
|
||||||
(((char*) index->mcbmi_control) + \
|
(((char*) index->mcbmi_control) + \
|
||||||
(mca_coll_sm_component.sm_control_size * \
|
(mca_coll_sm_component.sm_control_size * \
|
||||||
(parent_rank)))) + child_rank; \
|
(parent_rank)))) + child_rank; \
|
||||||
|
@ -405,9 +405,9 @@ sm_module_init(struct ompi_communicator_t *comm)
|
|||||||
/* Memory affinity: control */
|
/* Memory affinity: control */
|
||||||
|
|
||||||
maffinity[j].mbs_len = c->sm_control_size;
|
maffinity[j].mbs_len = c->sm_control_size;
|
||||||
maffinity[j].mbs_start_addr =
|
maffinity[j].mbs_start_addr = (void *)
|
||||||
data->mcb_mpool_index[i].mcbmi_control +
|
(data->mcb_mpool_index[i].mcbmi_control +
|
||||||
(rank * c->sm_control_size);
|
(rank * c->sm_control_size));
|
||||||
++j;
|
++j;
|
||||||
|
|
||||||
/* Memory affinity: data */
|
/* Memory affinity: data */
|
||||||
@ -430,7 +430,7 @@ sm_module_init(struct ompi_communicator_t *comm)
|
|||||||
memset(data->mcb_barrier_control_me, 0,
|
memset(data->mcb_barrier_control_me, 0,
|
||||||
num_barrier_buffers * 2 * c->sm_control_size);
|
num_barrier_buffers * 2 * c->sm_control_size);
|
||||||
for (i = 0; i < c->sm_comm_num_segments; ++i) {
|
for (i = 0; i < c->sm_comm_num_segments; ++i) {
|
||||||
memset(data->mcb_mpool_index[i].mcbmi_control, 0,
|
memset((void *) data->mcb_mpool_index[i].mcbmi_control, 0,
|
||||||
c->sm_control_size);
|
c->sm_control_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -147,7 +147,6 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count,
|
|||||||
mca_coll_sm_in_use_flag_t *flag;
|
mca_coll_sm_in_use_flag_t *flag;
|
||||||
ompi_convertor_t convertor;
|
ompi_convertor_t convertor;
|
||||||
mca_coll_base_mpool_index_t *index;
|
mca_coll_base_mpool_index_t *index;
|
||||||
mca_coll_sm_tree_node_t *me;
|
|
||||||
int32_t ddt_size;
|
int32_t ddt_size;
|
||||||
size_t segment_ddt_count, segment_ddt_bytes, zero = 0;
|
size_t segment_ddt_count, segment_ddt_bytes, zero = 0;
|
||||||
|
|
||||||
@ -156,8 +155,6 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count,
|
|||||||
rank = ompi_comm_rank(comm);
|
rank = ompi_comm_rank(comm);
|
||||||
size = ompi_comm_size(comm);
|
size = ompi_comm_size(comm);
|
||||||
|
|
||||||
me = &data->mcb_tree[(rank + size - root) % size];
|
|
||||||
|
|
||||||
/* Figure out how much we should have the convertor copy. We need
|
/* Figure out how much we should have the convertor copy. We need
|
||||||
to have it be in units of a datatype -- i.e., we only want to
|
to have it be in units of a datatype -- i.e., we only want to
|
||||||
copy a whole datatype worth of data or none at all (we've
|
copy a whole datatype worth of data or none at all (we've
|
||||||
@ -255,10 +252,8 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count,
|
|||||||
/* Main loop over receiving / reducing fragments */
|
/* Main loop over receiving / reducing fragments */
|
||||||
|
|
||||||
do {
|
do {
|
||||||
|
|
||||||
flag_num = (data->mcb_operation_count %
|
flag_num = (data->mcb_operation_count %
|
||||||
mca_coll_sm_component.sm_comm_num_in_use_flags);
|
mca_coll_sm_component.sm_comm_num_in_use_flags);
|
||||||
|
|
||||||
FLAG_SETUP(flag_num, flag, data);
|
FLAG_SETUP(flag_num, flag, data);
|
||||||
FLAG_WAIT_FOR_IDLE(flag);
|
FLAG_WAIT_FOR_IDLE(flag);
|
||||||
FLAG_RETAIN(flag, size, data->mcb_operation_count);
|
FLAG_RETAIN(flag, size, data->mcb_operation_count);
|
||||||
@ -417,7 +412,6 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count,
|
|||||||
*********************************************************************/
|
*********************************************************************/
|
||||||
|
|
||||||
else {
|
else {
|
||||||
int parent_rank = (me->mcstn_parent->mcstn_id + root) % size;
|
|
||||||
|
|
||||||
/* Here we get a convertor for the full count that the user
|
/* Here we get a convertor for the full count that the user
|
||||||
provided (as opposed to the convertor that the root got) */
|
provided (as opposed to the convertor that the root got) */
|
||||||
@ -454,7 +448,7 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count,
|
|||||||
do {
|
do {
|
||||||
index = &(data->mcb_mpool_index[segment_num]);
|
index = &(data->mcb_mpool_index[segment_num]);
|
||||||
|
|
||||||
/* Copy from the user's buffer to the shared mem
|
/* Copy from the user's buffer to my shared mem
|
||||||
segment */
|
segment */
|
||||||
COPY_FRAGMENT_IN(convertor, index, rank, iov, max_data);
|
COPY_FRAGMENT_IN(convertor, index, rank, iov, max_data);
|
||||||
bytes += max_data;
|
bytes += max_data;
|
||||||
@ -462,8 +456,10 @@ static int reduce_inorder(void *sbuf, void* rbuf, int count,
|
|||||||
/* Wait for the write to absolutely complete */
|
/* Wait for the write to absolutely complete */
|
||||||
opal_atomic_wmb();
|
opal_atomic_wmb();
|
||||||
|
|
||||||
/* Tell my parent that this fragment is ready */
|
/* Tell my parent (always the reduction root -- we're
|
||||||
CHILD_NOTIFY_PARENT(rank, parent_rank, index, max_data);
|
ignoring the mcb_tree parent/child relationships
|
||||||
|
here) that this fragment is ready */
|
||||||
|
CHILD_NOTIFY_PARENT(rank, root, index, max_data);
|
||||||
|
|
||||||
++segment_num;
|
++segment_num;
|
||||||
} while (bytes < total_size && segment_num < max_segment_num);
|
} while (bytes < total_size && segment_num < max_segment_num);
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user