coll-portals4: No more messages passed to Portals4 bigger than the limit given by PtlNIInit
Этот коммит содержится в:
родитель
175e6aa385
Коммит
a7e3de6c4f
@ -65,6 +65,7 @@ struct mca_coll_portals4_component_t {
|
|||||||
opal_free_list_t requests; /* request free list for the i collectives */
|
opal_free_list_t requests; /* request free list for the i collectives */
|
||||||
|
|
||||||
ptl_ni_limits_t ni_limits;
|
ptl_ni_limits_t ni_limits;
|
||||||
|
ptl_size_t portals_max_msg_size;
|
||||||
|
|
||||||
int use_binomial_gather_algorithm;
|
int use_binomial_gather_algorithm;
|
||||||
|
|
||||||
|
@ -89,12 +89,20 @@ static int prepare_bcast_data (struct ompi_communicator_t *comm,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Number of segments */
|
/* Number of segments */
|
||||||
request->u.bcast.segment_nb = (request->u.bcast.tmpsize > COLL_PORTALS4_MAX_BW) ?
|
{
|
||||||
(((request->u.bcast.tmpsize + COLL_PORTALS4_MAX_BW -1) / COLL_PORTALS4_MAX_BW) < COLL_PORTALS4_MAX_SEGMENT ?
|
size_t max_msg_size = (COLL_PORTALS4_MAX_BW > mca_coll_portals4_component.ni_limits.max_msg_size) ?
|
||||||
((request->u.bcast.tmpsize + COLL_PORTALS4_MAX_BW -1) / COLL_PORTALS4_MAX_BW) :
|
mca_coll_portals4_component.ni_limits.max_msg_size :
|
||||||
COLL_PORTALS4_MAX_SEGMENT) :
|
COLL_PORTALS4_MAX_BW;
|
||||||
|
|
||||||
|
//TODO : Either make compatible Portals size limits and COLL_PORTALS4_MAX_SEGMENT or remove COLL_PORTALS4_MAX_SEGMENT
|
||||||
|
request->u.bcast.segment_nb = (request->u.bcast.tmpsize > max_msg_size) ?
|
||||||
|
(((request->u.bcast.tmpsize + max_msg_size -1) / max_msg_size) < COLL_PORTALS4_MAX_SEGMENT ?
|
||||||
|
((request->u.bcast.tmpsize + max_msg_size -1) / max_msg_size) : COLL_PORTALS4_MAX_SEGMENT) :
|
||||||
1;
|
1;
|
||||||
|
|
||||||
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
|
"seg_number=%d , seg_size_max=%lu", request->u.bcast.segment_nb, max_msg_size));
|
||||||
|
}
|
||||||
if (request->u.bcast.segment_nb > COLL_PORTALS4_BCAST_ALGO_THRESHOLD) {
|
if (request->u.bcast.segment_nb > COLL_PORTALS4_BCAST_ALGO_THRESHOLD) {
|
||||||
request->u.bcast.algo = OMPI_COLL_PORTALS4_BCAST_PIPELINE_ALGO;
|
request->u.bcast.algo = OMPI_COLL_PORTALS4_BCAST_PIPELINE_ALGO;
|
||||||
}
|
}
|
||||||
@ -361,6 +369,8 @@ bcast_kary_tree_top(void *buff, int count,
|
|||||||
/* Divide buffer into segments */
|
/* Divide buffer into segments */
|
||||||
if (seg <= nb_long) length = seg_size + 1;
|
if (seg <= nb_long) length = seg_size + 1;
|
||||||
else length = seg_size;
|
else length = seg_size;
|
||||||
|
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
|
||||||
|
"bcast with k-ary tree : segment of size %ld", length);
|
||||||
|
|
||||||
/* compute the triggering threshold to send data to the children */
|
/* compute the triggering threshold to send data to the children */
|
||||||
trig_thr = segment_nb + seg - 1; /* To be sure the set of PtlTriggeredPut of DATA will be executed in order */
|
trig_thr = segment_nb + seg - 1; /* To be sure the set of PtlTriggeredPut of DATA will be executed in order */
|
||||||
@ -687,6 +697,8 @@ bcast_pipeline_top(void *buff, int count,
|
|||||||
/* Divide buffer into segments */
|
/* Divide buffer into segments */
|
||||||
if (seg <= nb_long) length = seg_size + 1;
|
if (seg <= nb_long) length = seg_size + 1;
|
||||||
else length = seg_size;
|
else length = seg_size;
|
||||||
|
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
|
||||||
|
"bcast with pipeline : segment of size %ld \n", length);
|
||||||
|
|
||||||
/* compute the triggering threshold to send data to the children */
|
/* compute the triggering threshold to send data to the children */
|
||||||
trig_thr = segment_nb + seg - 1; /* To be sure the PtlTriggeredPut will be executed in order */
|
trig_thr = segment_nb + seg - 1; /* To be sure the PtlTriggeredPut will be executed in order */
|
||||||
|
@ -211,6 +211,16 @@ portals4_register(void)
|
|||||||
MCA_BASE_VAR_SCOPE_READONLY,
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||||||
&mca_coll_portals4_component.use_binomial_gather_algorithm);
|
&mca_coll_portals4_component.use_binomial_gather_algorithm);
|
||||||
|
|
||||||
|
mca_coll_portals4_component.portals_max_msg_size = PTL_SIZE_MAX;
|
||||||
|
(void) mca_base_component_var_register(&mca_coll_portals4_component.super.collm_version,
|
||||||
|
"max_msg_size",
|
||||||
|
"Max size supported by portals4 (above that, a message is cut into messages less than that size)",
|
||||||
|
MCA_BASE_VAR_TYPE_UNSIGNED_LONG,
|
||||||
|
NULL, 0, 0,
|
||||||
|
OPAL_INFO_LVL_9,
|
||||||
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||||||
|
&mca_coll_portals4_component.portals_max_msg_size);
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -369,7 +379,13 @@ portals4_init_query(bool enable_progress_threads,
|
|||||||
__FILE__, __LINE__, ret);
|
__FILE__, __LINE__, ret);
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
|
||||||
|
"ni_limits.max_atomic_size=%ld", mca_coll_portals4_component.ni_limits.max_atomic_size);
|
||||||
|
|
||||||
|
if (mca_coll_portals4_component.portals_max_msg_size < mca_coll_portals4_component.ni_limits.max_msg_size)
|
||||||
|
mca_coll_portals4_component.ni_limits.max_msg_size = mca_coll_portals4_component.portals_max_msg_size;
|
||||||
|
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
|
||||||
|
"ni_limits.max_msg_size=%lu", mca_coll_portals4_component.ni_limits.max_msg_size);
|
||||||
|
|
||||||
ret = PtlGetId(mca_coll_portals4_component.ni_h, &mca_coll_portals4_component.id);
|
ret = PtlGetId(mca_coll_portals4_component.ni_h, &mca_coll_portals4_component.id);
|
||||||
if (PTL_OK != ret) {
|
if (PTL_OK != ret) {
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
#include "coll_portals4.h"
|
#include "coll_portals4.h"
|
||||||
#include "coll_portals4_request.h"
|
#include "coll_portals4_request.h"
|
||||||
|
|
||||||
|
#include <string.h> // included for ffs in get_tree_numdescendants_of
|
||||||
|
|
||||||
#undef RTR_USES_TRIGGERED_PUT
|
#undef RTR_USES_TRIGGERED_PUT
|
||||||
|
|
||||||
@ -55,6 +56,22 @@
|
|||||||
* |
|
* |
|
||||||
* 15
|
* 15
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
static int32_t get_tree_numdescendants_of(struct ompi_communicator_t* comm,
|
||||||
|
int vrank)
|
||||||
|
{
|
||||||
|
int max;
|
||||||
|
int size = ompi_comm_size(comm);
|
||||||
|
|
||||||
|
if (0 == vrank) {
|
||||||
|
return size - 1;
|
||||||
|
} else {
|
||||||
|
max = 1 << ffs(vrank - 1);
|
||||||
|
return ((vrank + max <= size ) ? max : size - vrank) -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
static ompi_coll_portals4_tree_t*
|
static ompi_coll_portals4_tree_t*
|
||||||
ompi_coll_portals4_build_in_order_bmtree( struct ompi_communicator_t* comm,
|
ompi_coll_portals4_build_in_order_bmtree( struct ompi_communicator_t* comm,
|
||||||
int root )
|
int root )
|
||||||
@ -506,8 +523,10 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc
|
|||||||
int32_t expected_ops =0;
|
int32_t expected_ops =0;
|
||||||
int32_t expected_acks=0;
|
int32_t expected_acks=0;
|
||||||
|
|
||||||
|
ptl_size_t number_of_fragment_gathered = 0;
|
||||||
|
ptl_size_t number_of_fragment_send = 1;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:gather_intra_binomial_top enter rank %d", request->u.gather.my_rank));
|
"coll:portals4:gather_intra_binomial_top enter rank %d", request->u.gather.my_rank));
|
||||||
|
|
||||||
request->type = OMPI_COLL_PORTALS4_TYPE_GATHER;
|
request->type = OMPI_COLL_PORTALS4_TYPE_GATHER;
|
||||||
@ -579,6 +598,23 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc
|
|||||||
ret = setup_sync_handles(comm, request, portals4_module);
|
ret = setup_sync_handles(comm, request, portals4_module);
|
||||||
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
||||||
|
|
||||||
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
|
"%s:%d: packed_size=%lu, fragment_size=%lu",
|
||||||
|
__FILE__, __LINE__, request->u.gather.packed_size, mca_coll_portals4_component.ni_limits.max_msg_size));
|
||||||
|
|
||||||
|
for (int i =0; i < bmtree->tree_nextsize; i++) {
|
||||||
|
int child_vrank = VRANK(bmtree->tree_next[i], request->u.gather.root_rank, request->u.gather.size);
|
||||||
|
int sub_tree_size = get_tree_numdescendants_of(comm, child_vrank) + 1;
|
||||||
|
ptl_size_t local_number_of_fragment = ((sub_tree_size * request->u.gather.packed_size) + mca_coll_portals4_component.ni_limits.max_msg_size -1) / mca_coll_portals4_component.ni_limits.max_msg_size;
|
||||||
|
|
||||||
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
|
"%s:%d: %d is child of %d(%d) with %d descendants (nb_frag += %lu)",
|
||||||
|
__FILE__, __LINE__, bmtree->tree_next[i], vrank, request->u.gather.root_rank , sub_tree_size, local_number_of_fragment));
|
||||||
|
number_of_fragment_gathered += local_number_of_fragment;
|
||||||
|
}
|
||||||
|
|
||||||
|
number_of_fragment_send = (request->u.gather.gather_bytes + mca_coll_portals4_component.ni_limits.max_msg_size -1) / mca_coll_portals4_component.ni_limits.max_msg_size;
|
||||||
|
|
||||||
/***********************************************/
|
/***********************************************/
|
||||||
/* Chain the RTR and Recv-ACK to the Gather CT */
|
/* Chain the RTR and Recv-ACK to the Gather CT */
|
||||||
/***********************************************/
|
/***********************************************/
|
||||||
@ -603,7 +639,7 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc
|
|||||||
if (vrank == 0) {
|
if (vrank == 0) {
|
||||||
/* root, so do nothing */
|
/* root, so do nothing */
|
||||||
|
|
||||||
expected_ops=bmtree->tree_nextsize; /* gather put from each child */
|
expected_ops=number_of_fragment_gathered ; /* gather put from each child */
|
||||||
expected_acks=0;
|
expected_acks=0;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
@ -617,22 +653,32 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc
|
|||||||
__FILE__, __LINE__, vrank,
|
__FILE__, __LINE__, vrank,
|
||||||
remote_offset, vrank, vparent, request->u.gather.packed_size);
|
remote_offset, vrank, vparent, request->u.gather.packed_size);
|
||||||
|
|
||||||
expected_ops=bmtree->tree_nextsize + 1; /* gather put from each child + a chained RTR */
|
expected_ops=number_of_fragment_gathered + 1; /* gather puts from each child + a chained RTR */
|
||||||
expected_acks=1; /* Recv-ACK from parent */
|
expected_acks=1; /* Recv-ACK from parent */
|
||||||
|
|
||||||
|
ptl_size_t size_sent = 0;
|
||||||
|
ptl_size_t size_left = request->u.gather.gather_bytes;
|
||||||
|
|
||||||
|
for (ptl_size_t i = 0 ; i < number_of_fragment_send; i++) {
|
||||||
|
ptl_size_t frag_size = (size_left > mca_coll_portals4_component.ni_limits.max_msg_size) ?
|
||||||
|
mca_coll_portals4_component.ni_limits.max_msg_size:
|
||||||
|
size_left;
|
||||||
ret = PtlTriggeredPut(request->u.gather.gather_mdh,
|
ret = PtlTriggeredPut(request->u.gather.gather_mdh,
|
||||||
request->u.gather.gather_offset,
|
request->u.gather.gather_offset + size_sent,
|
||||||
request->u.gather.gather_bytes,
|
frag_size,
|
||||||
PTL_NO_ACK_REQ,
|
PTL_NO_ACK_REQ,
|
||||||
ompi_coll_portals4_get_peer(comm, parent),
|
ompi_coll_portals4_get_peer(comm, parent),
|
||||||
mca_coll_portals4_component.pt_idx,
|
mca_coll_portals4_component.pt_idx,
|
||||||
request->u.gather.gather_match_bits,
|
request->u.gather.gather_match_bits,
|
||||||
remote_offset,
|
remote_offset + size_sent,
|
||||||
NULL,
|
NULL,
|
||||||
0,
|
0,
|
||||||
request->u.gather.gather_cth,
|
request->u.gather.gather_cth,
|
||||||
expected_ops);
|
expected_ops);
|
||||||
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
||||||
|
size_left -= frag_size;
|
||||||
|
size_sent += frag_size;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/************************************/
|
/************************************/
|
||||||
@ -734,7 +780,7 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc
|
|||||||
|
|
||||||
ompi_coll_portals4_destroy_tree(&(portals4_module->cached_in_order_bmtree));
|
ompi_coll_portals4_destroy_tree(&(portals4_module->cached_in_order_bmtree));
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:gather_intra_binomial_top exit rank %d", request->u.gather.my_rank));
|
"coll:portals4:gather_intra_binomial_top exit rank %d", request->u.gather.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -773,8 +819,9 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct
|
|||||||
int32_t expected_ops =0;
|
int32_t expected_ops =0;
|
||||||
int32_t expected_acks=0;
|
int32_t expected_acks=0;
|
||||||
|
|
||||||
|
ptl_size_t number_of_fragment = 1;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:gather_intra_linear_top enter rank %d", request->u.gather.my_rank));
|
"coll:portals4:gather_intra_linear_top enter rank %d", request->u.gather.my_rank));
|
||||||
|
|
||||||
request->type = OMPI_COLL_PORTALS4_TYPE_GATHER;
|
request->type = OMPI_COLL_PORTALS4_TYPE_GATHER;
|
||||||
@ -843,6 +890,13 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct
|
|||||||
ret = setup_sync_handles(comm, request, portals4_module);
|
ret = setup_sync_handles(comm, request, portals4_module);
|
||||||
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
||||||
|
|
||||||
|
number_of_fragment = (request->u.gather.packed_size > mca_coll_portals4_component.ni_limits.max_msg_size) ?
|
||||||
|
(request->u.gather.packed_size + mca_coll_portals4_component.ni_limits.max_msg_size - 1) / mca_coll_portals4_component.ni_limits.max_msg_size :
|
||||||
|
1;
|
||||||
|
opal_output_verbose(90, ompi_coll_base_framework.framework_output,
|
||||||
|
"%s:%d:rank %d:number_of_fragment = %lu",
|
||||||
|
__FILE__, __LINE__, request->u.gather.my_rank, number_of_fragment);
|
||||||
|
|
||||||
/***********************************************/
|
/***********************************************/
|
||||||
/* Chain the RTR and Recv-ACK to the Gather CT */
|
/* Chain the RTR and Recv-ACK to the Gather CT */
|
||||||
/***********************************************/
|
/***********************************************/
|
||||||
@ -867,11 +921,13 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct
|
|||||||
if (i_am_root) {
|
if (i_am_root) {
|
||||||
/* root, so do nothing */
|
/* root, so do nothing */
|
||||||
|
|
||||||
expected_ops=request->u.gather.size-1; /* gather put from all other ranks */
|
expected_ops=(request->u.gather.size-1) * number_of_fragment; /* gather put from all other ranks */
|
||||||
expected_acks=0;
|
expected_acks=0;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
ptl_size_t remote_offset=request->u.gather.my_rank * request->u.gather.packed_size;
|
ptl_size_t remote_offset=request->u.gather.my_rank * request->u.gather.packed_size;
|
||||||
|
ptl_size_t split_offset = 0;
|
||||||
|
ptl_size_t size_left = request->u.gather.gather_bytes;
|
||||||
|
|
||||||
opal_output_verbose(30, ompi_coll_base_framework.framework_output,
|
opal_output_verbose(30, ompi_coll_base_framework.framework_output,
|
||||||
"%s:%d:rank(%d): remote_offset(%lu)=rank(%d) * packed_size(%ld)",
|
"%s:%d:rank(%d): remote_offset(%lu)=rank(%d) * packed_size(%ld)",
|
||||||
@ -881,19 +937,34 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct
|
|||||||
expected_ops=1; /* chained RTR */
|
expected_ops=1; /* chained RTR */
|
||||||
expected_acks=1; /* Recv-ACK from root */
|
expected_acks=1; /* Recv-ACK from root */
|
||||||
|
|
||||||
|
for (ptl_size_t j=0; j<number_of_fragment; j++) {
|
||||||
|
|
||||||
|
ptl_size_t frag_size = (size_left > mca_coll_portals4_component.ni_limits.max_msg_size) ?
|
||||||
|
mca_coll_portals4_component.ni_limits.max_msg_size :
|
||||||
|
size_left;
|
||||||
|
|
||||||
|
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
|
||||||
|
"%s:%d:rank(%d): frag(%lu),offset_frag (%lu) frag_size(%lu)",
|
||||||
|
__FILE__, __LINE__, request->u.gather.my_rank,
|
||||||
|
j, split_offset, frag_size);
|
||||||
|
|
||||||
ret = PtlTriggeredPut(request->u.gather.gather_mdh,
|
ret = PtlTriggeredPut(request->u.gather.gather_mdh,
|
||||||
request->u.gather.gather_offset,
|
request->u.gather.gather_offset + split_offset,
|
||||||
request->u.gather.gather_bytes,
|
frag_size,
|
||||||
PTL_NO_ACK_REQ,
|
PTL_NO_ACK_REQ,
|
||||||
ompi_coll_portals4_get_peer(comm, request->u.gather.root_rank),
|
ompi_coll_portals4_get_peer(comm, request->u.gather.root_rank),
|
||||||
mca_coll_portals4_component.pt_idx,
|
mca_coll_portals4_component.pt_idx,
|
||||||
request->u.gather.gather_match_bits,
|
request->u.gather.gather_match_bits,
|
||||||
remote_offset,
|
remote_offset + split_offset,
|
||||||
NULL,
|
NULL,
|
||||||
0,
|
0,
|
||||||
request->u.gather.gather_cth,
|
request->u.gather.gather_cth,
|
||||||
expected_ops);
|
expected_ops);
|
||||||
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
||||||
|
|
||||||
|
size_left -= frag_size;
|
||||||
|
split_offset += frag_size;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*****************************************/
|
/*****************************************/
|
||||||
@ -997,7 +1068,7 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct
|
|||||||
"completed CTWait(expected_ops=%d)\n", expected_ops);
|
"completed CTWait(expected_ops=%d)\n", expected_ops);
|
||||||
}
|
}
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:gather_intra_linear_top exit rank %d", request->u.gather.my_rank));
|
"coll:portals4:gather_intra_linear_top exit rank %d", request->u.gather.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -1020,7 +1091,7 @@ ompi_coll_portals4_gather_intra_binomial_bottom(struct ompi_communicator_t *comm
|
|||||||
int ret, line;
|
int ret, line;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:gather_intra_binomial_bottom enter rank %d", request->u.gather.my_rank));
|
"coll:portals4:gather_intra_binomial_bottom enter rank %d", request->u.gather.my_rank));
|
||||||
|
|
||||||
ret = cleanup_gather_handles(request);
|
ret = cleanup_gather_handles(request);
|
||||||
@ -1065,7 +1136,7 @@ ompi_coll_portals4_gather_intra_binomial_bottom(struct ompi_communicator_t *comm
|
|||||||
ompi_request_complete(&request->super, true);
|
ompi_request_complete(&request->super, true);
|
||||||
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:gather_intra_binomial_bottom exit rank %d", request->u.gather.my_rank));
|
"coll:portals4:gather_intra_binomial_bottom exit rank %d", request->u.gather.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -1090,7 +1161,7 @@ ompi_coll_portals4_gather_intra_linear_bottom(struct ompi_communicator_t *comm,
|
|||||||
int ret, line;
|
int ret, line;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:gather_intra_linear_bottom enter rank %d", request->u.gather.my_rank));
|
"coll:portals4:gather_intra_linear_bottom enter rank %d", request->u.gather.my_rank));
|
||||||
|
|
||||||
ret = cleanup_gather_handles(request);
|
ret = cleanup_gather_handles(request);
|
||||||
@ -1128,7 +1199,7 @@ ompi_coll_portals4_gather_intra_linear_bottom(struct ompi_communicator_t *comm,
|
|||||||
ompi_request_complete(&request->super, true);
|
ompi_request_complete(&request->super, true);
|
||||||
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:gather_intra_linear_bottom exit rank %d", request->u.gather.my_rank));
|
"coll:portals4:gather_intra_linear_bottom exit rank %d", request->u.gather.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -1157,7 +1228,7 @@ ompi_coll_portals4_gather_intra(const void *sbuf, int scount, struct ompi_dataty
|
|||||||
|
|
||||||
ompi_coll_portals4_request_t *request;
|
ompi_coll_portals4_request_t *request;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:gather_intra enter rank %d", ompi_comm_rank(comm)));
|
"coll:portals4:gather_intra enter rank %d", ompi_comm_rank(comm)));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1204,7 +1275,7 @@ ompi_coll_portals4_gather_intra(const void *sbuf, int scount, struct ompi_dataty
|
|||||||
*/
|
*/
|
||||||
OMPI_COLL_PORTALS4_REQUEST_RETURN(request);
|
OMPI_COLL_PORTALS4_REQUEST_RETURN(request);
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:gather_intra exit rank %d", request->u.gather.my_rank));
|
"coll:portals4:gather_intra exit rank %d", request->u.gather.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -1230,7 +1301,7 @@ ompi_coll_portals4_igather_intra(const void *sbuf, int scount, struct ompi_datat
|
|||||||
|
|
||||||
ompi_coll_portals4_request_t *request;
|
ompi_coll_portals4_request_t *request;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:igather_intra enter rank %d", ompi_comm_rank(comm)));
|
"coll:portals4:igather_intra enter rank %d", ompi_comm_rank(comm)));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1267,7 +1338,7 @@ ompi_coll_portals4_igather_intra(const void *sbuf, int scount, struct ompi_datat
|
|||||||
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
||||||
}
|
}
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:igather_intra exit rank %d", request->u.gather.my_rank));
|
"coll:portals4:igather_intra exit rank %d", request->u.gather.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -1286,7 +1357,7 @@ ompi_coll_portals4_igather_intra_fini(ompi_coll_portals4_request_t *request)
|
|||||||
{
|
{
|
||||||
int ret, line;
|
int ret, line;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:igather_intra_fini enter rank %d", request->u.gather.my_rank));
|
"coll:portals4:igather_intra_fini enter rank %d", request->u.gather.my_rank));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1300,7 +1371,7 @@ ompi_coll_portals4_igather_intra_fini(ompi_coll_portals4_request_t *request)
|
|||||||
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
||||||
}
|
}
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:igather_intra_fini exit rank %d", request->u.gather.my_rank));
|
"coll:portals4:igather_intra_fini exit rank %d", request->u.gather.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
|
@ -127,7 +127,7 @@ setup_scatter_handles(struct ompi_communicator_t *comm,
|
|||||||
|
|
||||||
ptl_me_t me;
|
ptl_me_t me;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:setup_scatter_handles enter rank %d", request->u.scatter.my_rank));
|
"coll:portals4:setup_scatter_handles enter rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
/**********************************/
|
/**********************************/
|
||||||
@ -136,7 +136,7 @@ setup_scatter_handles(struct ompi_communicator_t *comm,
|
|||||||
COLL_PORTALS4_SET_BITS(request->u.scatter.scatter_match_bits, ompi_comm_get_cid(comm),
|
COLL_PORTALS4_SET_BITS(request->u.scatter.scatter_match_bits, ompi_comm_get_cid(comm),
|
||||||
0, 0, COLL_PORTALS4_SCATTER, 0, request->u.scatter.coll_count);
|
0, 0, COLL_PORTALS4_SCATTER, 0, request->u.scatter.coll_count);
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:setup_scatter_handles rank(%d) scatter_match_bits(0x%016lX)",
|
"coll:portals4:setup_scatter_handles rank(%d) scatter_match_bits(0x%016lX)",
|
||||||
request->u.scatter.my_rank, request->u.scatter.scatter_match_bits));
|
request->u.scatter.my_rank, request->u.scatter.scatter_match_bits));
|
||||||
|
|
||||||
@ -166,7 +166,7 @@ setup_scatter_handles(struct ompi_communicator_t *comm,
|
|||||||
&request->u.scatter.scatter_meh);
|
&request->u.scatter.scatter_meh);
|
||||||
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:setup_scatter_handles exit rank %d", request->u.scatter.my_rank));
|
"coll:portals4:setup_scatter_handles exit rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -188,7 +188,7 @@ setup_sync_handles(struct ompi_communicator_t *comm,
|
|||||||
|
|
||||||
ptl_me_t me;
|
ptl_me_t me;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:setup_sync_handles enter rank %d", request->u.scatter.my_rank));
|
"coll:portals4:setup_sync_handles enter rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
/**********************************/
|
/**********************************/
|
||||||
@ -197,7 +197,7 @@ setup_sync_handles(struct ompi_communicator_t *comm,
|
|||||||
COLL_PORTALS4_SET_BITS(request->u.scatter.sync_match_bits, ompi_comm_get_cid(comm),
|
COLL_PORTALS4_SET_BITS(request->u.scatter.sync_match_bits, ompi_comm_get_cid(comm),
|
||||||
0, 1, COLL_PORTALS4_SCATTER, 0, request->u.scatter.coll_count);
|
0, 1, COLL_PORTALS4_SCATTER, 0, request->u.scatter.coll_count);
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:setup_sync_handles rank(%d) sync_match_bits(0x%016lX)",
|
"coll:portals4:setup_sync_handles rank(%d) sync_match_bits(0x%016lX)",
|
||||||
request->u.scatter.my_rank, request->u.scatter.sync_match_bits));
|
request->u.scatter.my_rank, request->u.scatter.sync_match_bits));
|
||||||
|
|
||||||
@ -227,7 +227,7 @@ setup_sync_handles(struct ompi_communicator_t *comm,
|
|||||||
&request->u.scatter.sync_meh);
|
&request->u.scatter.sync_meh);
|
||||||
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:setup_sync_handles exit rank %d", request->u.scatter.my_rank));
|
"coll:portals4:setup_sync_handles exit rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -245,7 +245,7 @@ cleanup_scatter_handles(ompi_coll_portals4_request_t *request)
|
|||||||
{
|
{
|
||||||
int ret, line;
|
int ret, line;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:cleanup_scatter_handles enter rank %d", request->u.scatter.my_rank));
|
"coll:portals4:cleanup_scatter_handles enter rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
/**********************************/
|
/**********************************/
|
||||||
@ -265,7 +265,7 @@ cleanup_scatter_handles(ompi_coll_portals4_request_t *request)
|
|||||||
ret = PtlCTFree(request->u.scatter.scatter_cth);
|
ret = PtlCTFree(request->u.scatter.scatter_cth);
|
||||||
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:cleanup_scatter_handles exit rank %d", request->u.scatter.my_rank));
|
"coll:portals4:cleanup_scatter_handles exit rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -284,7 +284,7 @@ cleanup_sync_handles(ompi_coll_portals4_request_t *request)
|
|||||||
int ret, line;
|
int ret, line;
|
||||||
int ptl_ret;
|
int ptl_ret;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:cleanup_sync_handles enter rank %d", request->u.scatter.my_rank));
|
"coll:portals4:cleanup_sync_handles enter rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
/**********************************/
|
/**********************************/
|
||||||
@ -304,7 +304,7 @@ cleanup_sync_handles(ompi_coll_portals4_request_t *request)
|
|||||||
ret = PtlCTFree(request->u.scatter.sync_cth);
|
ret = PtlCTFree(request->u.scatter.sync_cth);
|
||||||
if (PTL_OK != ret) { ptl_ret = ret; ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
if (PTL_OK != ret) { ptl_ret = ret; ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:cleanup_sync_handles exit rank %d", request->u.scatter.my_rank));
|
"coll:portals4:cleanup_sync_handles exit rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -341,8 +341,9 @@ ompi_coll_portals4_scatter_intra_linear_top(const void *sbuf, int scount, struct
|
|||||||
int32_t expected_chained_rtrs = 0;
|
int32_t expected_chained_rtrs = 0;
|
||||||
int32_t expected_chained_acks = 0;
|
int32_t expected_chained_acks = 0;
|
||||||
|
|
||||||
|
ptl_size_t number_of_fragment = 1;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:scatter_intra_linear_top enter rank %d", request->u.scatter.my_rank));
|
"coll:portals4:scatter_intra_linear_top enter rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
request->type = OMPI_COLL_PORTALS4_TYPE_SCATTER;
|
request->type = OMPI_COLL_PORTALS4_TYPE_SCATTER;
|
||||||
@ -409,6 +410,13 @@ ompi_coll_portals4_scatter_intra_linear_top(const void *sbuf, int scount, struct
|
|||||||
ret = setup_sync_handles(comm, request, portals4_module);
|
ret = setup_sync_handles(comm, request, portals4_module);
|
||||||
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
||||||
|
|
||||||
|
number_of_fragment = (request->u.scatter.packed_size > mca_coll_portals4_component.ni_limits.max_msg_size) ?
|
||||||
|
(request->u.scatter.packed_size + mca_coll_portals4_component.ni_limits.max_msg_size - 1) / mca_coll_portals4_component.ni_limits.max_msg_size :
|
||||||
|
1;
|
||||||
|
opal_output_verbose(90, ompi_coll_base_framework.framework_output,
|
||||||
|
"%s:%d:rank %d:number_of_fragment = %lu",
|
||||||
|
__FILE__, __LINE__, request->u.scatter.my_rank, number_of_fragment);
|
||||||
|
|
||||||
/**********************************/
|
/**********************************/
|
||||||
/* do the scatter */
|
/* do the scatter */
|
||||||
/**********************************/
|
/**********************************/
|
||||||
@ -445,25 +453,42 @@ ompi_coll_portals4_scatter_intra_linear_top(const void *sbuf, int scount, struct
|
|||||||
}
|
}
|
||||||
|
|
||||||
ptl_size_t offset = request->u.scatter.packed_size * i;
|
ptl_size_t offset = request->u.scatter.packed_size * i;
|
||||||
|
ptl_size_t size_sent = 0;
|
||||||
|
ptl_size_t size_left = request->u.scatter.packed_size;
|
||||||
|
|
||||||
opal_output_verbose(30, ompi_coll_base_framework.framework_output,
|
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
|
||||||
"%s:%d:rank(%d): offset(%lu)=rank(%d) * packed_size(%ld)",
|
"%s:%d:rank(%d): offset(%lu)=rank(%d) * packed_size(%ld)",
|
||||||
__FILE__, __LINE__, request->u.scatter.my_rank,
|
__FILE__, __LINE__, request->u.scatter.my_rank,
|
||||||
offset, i, request->u.scatter.packed_size);
|
offset, i, request->u.scatter.packed_size);
|
||||||
|
|
||||||
|
for (ptl_size_t j=0; j<number_of_fragment; j++) {
|
||||||
|
|
||||||
|
ptl_size_t frag_size = (size_left > mca_coll_portals4_component.ni_limits.max_msg_size) ?
|
||||||
|
mca_coll_portals4_component.ni_limits.max_msg_size :
|
||||||
|
size_left;
|
||||||
|
|
||||||
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
|
"%s:%d:rank(%d): frag(%lu),offset_frag (%lu) frag_size(%lu)",
|
||||||
|
__FILE__, __LINE__, request->u.scatter.my_rank,
|
||||||
|
j, size_sent, frag_size));
|
||||||
|
|
||||||
ret = PtlTriggeredPut(request->u.scatter.scatter_mdh,
|
ret = PtlTriggeredPut(request->u.scatter.scatter_mdh,
|
||||||
(ptl_size_t)request->u.scatter.scatter_buf + offset,
|
(ptl_size_t)request->u.scatter.scatter_buf + offset + size_sent,
|
||||||
request->u.scatter.packed_size,
|
frag_size,
|
||||||
PTL_NO_ACK_REQ,
|
PTL_NO_ACK_REQ,
|
||||||
ompi_coll_portals4_get_peer(comm, i),
|
ompi_coll_portals4_get_peer(comm, i),
|
||||||
mca_coll_portals4_component.pt_idx,
|
mca_coll_portals4_component.pt_idx,
|
||||||
request->u.scatter.scatter_match_bits,
|
request->u.scatter.scatter_match_bits,
|
||||||
0,
|
size_sent,
|
||||||
NULL,
|
NULL,
|
||||||
0,
|
0,
|
||||||
request->u.scatter.scatter_cth,
|
request->u.scatter.scatter_cth,
|
||||||
expected_chained_rtrs);
|
expected_chained_rtrs);
|
||||||
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
||||||
|
|
||||||
|
size_left -= frag_size;
|
||||||
|
size_sent += frag_size;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
/* non-root, so do nothing */
|
/* non-root, so do nothing */
|
||||||
@ -473,7 +498,7 @@ ompi_coll_portals4_scatter_intra_linear_top(const void *sbuf, int scount, struct
|
|||||||
expected_acks = 0;
|
expected_acks = 0;
|
||||||
|
|
||||||
/* operations on the scatter counter */
|
/* operations on the scatter counter */
|
||||||
expected_puts = 1; /* scatter put from root */
|
expected_puts = number_of_fragment; /* scatter put from root */
|
||||||
expected_chained_rtrs = 0;
|
expected_chained_rtrs = 0;
|
||||||
expected_chained_acks = 0;
|
expected_chained_acks = 0;
|
||||||
}
|
}
|
||||||
@ -552,7 +577,7 @@ ompi_coll_portals4_scatter_intra_linear_top(const void *sbuf, int scount, struct
|
|||||||
"completed CTWait(expected_ops=%d)\n", expected_ops);
|
"completed CTWait(expected_ops=%d)\n", expected_ops);
|
||||||
}
|
}
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:scatter_intra_linear_top exit rank %d", request->u.scatter.my_rank));
|
"coll:portals4:scatter_intra_linear_top exit rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -574,7 +599,7 @@ ompi_coll_portals4_scatter_intra_linear_bottom(struct ompi_communicator_t *comm,
|
|||||||
{
|
{
|
||||||
int ret, line;
|
int ret, line;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:scatter_intra_linear_bottom enter rank %d", request->u.scatter.my_rank));
|
"coll:portals4:scatter_intra_linear_bottom enter rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
ret = cleanup_scatter_handles(request);
|
ret = cleanup_scatter_handles(request);
|
||||||
@ -616,7 +641,7 @@ ompi_coll_portals4_scatter_intra_linear_bottom(struct ompi_communicator_t *comm,
|
|||||||
ompi_request_complete(&request->super, true);
|
ompi_request_complete(&request->super, true);
|
||||||
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:scatter_intra_linear_bottom exit rank %d", request->u.scatter.my_rank));
|
"coll:portals4:scatter_intra_linear_bottom exit rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -645,7 +670,7 @@ ompi_coll_portals4_scatter_intra(const void *sbuf, int scount, struct ompi_datat
|
|||||||
|
|
||||||
ompi_coll_portals4_request_t *request;
|
ompi_coll_portals4_request_t *request;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:scatter_intra enter rank %d", ompi_comm_rank(comm)));
|
"coll:portals4:scatter_intra enter rank %d", ompi_comm_rank(comm)));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -679,7 +704,7 @@ ompi_coll_portals4_scatter_intra(const void *sbuf, int scount, struct ompi_datat
|
|||||||
*/
|
*/
|
||||||
OMPI_COLL_PORTALS4_REQUEST_RETURN(request);
|
OMPI_COLL_PORTALS4_REQUEST_RETURN(request);
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:scatter_intra exit rank %d", request->u.scatter.my_rank));
|
"coll:portals4:scatter_intra exit rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -705,7 +730,7 @@ ompi_coll_portals4_iscatter_intra(const void *sbuf, int scount, struct ompi_data
|
|||||||
|
|
||||||
ompi_coll_portals4_request_t *request;
|
ompi_coll_portals4_request_t *request;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:iscatter_intra enter rank %d", ompi_comm_rank(comm)));
|
"coll:portals4:iscatter_intra enter rank %d", ompi_comm_rank(comm)));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -732,7 +757,7 @@ ompi_coll_portals4_iscatter_intra(const void *sbuf, int scount, struct ompi_data
|
|||||||
module);
|
module);
|
||||||
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:iscatter_intra exit rank %d", request->u.scatter.my_rank));
|
"coll:portals4:iscatter_intra exit rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -751,7 +776,7 @@ ompi_coll_portals4_iscatter_intra_fini(ompi_coll_portals4_request_t *request)
|
|||||||
{
|
{
|
||||||
int ret, line;
|
int ret, line;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:iscatter_intra_fini enter rank %d", request->u.scatter.my_rank));
|
"coll:portals4:iscatter_intra_fini enter rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -760,7 +785,7 @@ ompi_coll_portals4_iscatter_intra_fini(ompi_coll_portals4_request_t *request)
|
|||||||
ret = ompi_coll_portals4_scatter_intra_linear_bottom(request->super.req_mpi_object.comm, request);
|
ret = ompi_coll_portals4_scatter_intra_linear_bottom(request->super.req_mpi_object.comm, request);
|
||||||
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:iscatter_intra_fini exit rank %d", request->u.scatter.my_rank));
|
"coll:portals4:iscatter_intra_fini exit rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user