Merge pull request #1896 from PDeveze/Patchs-on-coll-portals4
Patchs on coll portals4
Этот коммит содержится в:
Коммит
3be6052523
@ -65,6 +65,7 @@ struct mca_coll_portals4_component_t {
|
|||||||
opal_free_list_t requests; /* request free list for the i collectives */
|
opal_free_list_t requests; /* request free list for the i collectives */
|
||||||
|
|
||||||
ptl_ni_limits_t ni_limits;
|
ptl_ni_limits_t ni_limits;
|
||||||
|
ptl_size_t portals_max_msg_size;
|
||||||
|
|
||||||
int use_binomial_gather_algorithm;
|
int use_binomial_gather_algorithm;
|
||||||
|
|
||||||
@ -314,7 +315,7 @@ is_reduce_optimizable(struct ompi_datatype_t *dtype, size_t length, struct ompi_
|
|||||||
}
|
}
|
||||||
|
|
||||||
*ptl_dtype = ompi_coll_portals4_atomic_datatype[dtype->id];
|
*ptl_dtype = ompi_coll_portals4_atomic_datatype[dtype->id];
|
||||||
if (*ptl_dtype == COLL_PORTALS4_NO_DTYPE){
|
if (*ptl_dtype == COLL_PORTALS4_NO_DTYPE) {
|
||||||
opal_output_verbose(50, ompi_coll_base_framework.framework_output,
|
opal_output_verbose(50, ompi_coll_base_framework.framework_output,
|
||||||
"datatype %d not supported\n",
|
"datatype %d not supported\n",
|
||||||
dtype->id);
|
dtype->id);
|
||||||
|
@ -265,7 +265,7 @@ allreduce_kary_tree_top(const void *sendbuf, void *recvbuf, int count,
|
|||||||
ompi_coll_portals4_get_peer(comm, child[i]),
|
ompi_coll_portals4_get_peer(comm, child[i]),
|
||||||
mca_coll_portals4_component.pt_idx,
|
mca_coll_portals4_component.pt_idx,
|
||||||
match_bits_rtr, 0, NULL, 0)) != PTL_OK)
|
match_bits_rtr, 0, NULL, 0)) != PTL_OK)
|
||||||
return opal_stderr("Put RTR failed", __FILE__, __LINE__, ret);
|
return opal_stderr("Put RTR failed %d", __FILE__, __LINE__, ret);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -408,7 +408,7 @@ int ompi_coll_portals4_iallreduce_intra(const void* sendbuf, void* recvbuf, int
|
|||||||
allreduce_kary_tree_top(sendbuf, recvbuf, count,
|
allreduce_kary_tree_top(sendbuf, recvbuf, count,
|
||||||
dtype, op, comm, request, portals4_module);
|
dtype, op, comm, request, portals4_module);
|
||||||
|
|
||||||
puts("iallreduce");
|
opal_output_verbose(10, ompi_coll_base_framework.framework_output, "iallreduce");
|
||||||
return (OMPI_SUCCESS);
|
return (OMPI_SUCCESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -147,9 +147,31 @@ barrier_hypercube_top(struct ompi_communicator_t *comm,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (is_sync) {
|
if (is_sync) {
|
||||||
/* Send a put to self when we've received all our messages... */
|
/* Each process has a pending PtlTriggeredPut. To be sure this request will be triggered, we must
|
||||||
ret = PtlCTWait(request->u.barrier.rtr_ct_h, num_msgs, &event);
|
call PtlTriggeredCTInc twice. Otherwise, we could free the CT too early and the Put wouldn't be triggered */
|
||||||
|
|
||||||
|
ptl_ct_event_t ct_inc;
|
||||||
|
|
||||||
|
ct_inc.success = 1;
|
||||||
|
ct_inc.failure = 0;
|
||||||
|
|
||||||
|
if ((ret = PtlTriggeredCTInc(request->u.barrier.rtr_ct_h, ct_inc,
|
||||||
|
request->u.barrier.rtr_ct_h, num_msgs)) != 0) {
|
||||||
|
return opal_stderr("PtlTriggeredCTInc failed", __FILE__, __LINE__, ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((ret = PtlTriggeredCTInc(request->u.barrier.rtr_ct_h, ct_inc,
|
||||||
|
request->u.barrier.rtr_ct_h, num_msgs + 1)) != 0) {
|
||||||
|
return opal_stderr("PtlTriggeredCTInc failed", __FILE__, __LINE__, ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = PtlCTWait(request->u.barrier.rtr_ct_h, num_msgs + 2, &event);
|
||||||
|
if (PTL_OK != ret) {
|
||||||
|
opal_output_verbose(1, ompi_coll_base_framework.framework_output,
|
||||||
|
"%s:%d: PtlCTWait failed: %d\n",
|
||||||
|
__FILE__, __LINE__, ret);
|
||||||
|
return OMPI_ERROR;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
/* Send a put to self when we've received all our messages... */
|
/* Send a put to self when we've received all our messages... */
|
||||||
|
@ -89,12 +89,20 @@ static int prepare_bcast_data (struct ompi_communicator_t *comm,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Number of segments */
|
/* Number of segments */
|
||||||
request->u.bcast.segment_nb = (request->u.bcast.tmpsize > COLL_PORTALS4_MAX_BW) ?
|
{
|
||||||
(((request->u.bcast.tmpsize + COLL_PORTALS4_MAX_BW -1) / COLL_PORTALS4_MAX_BW) < COLL_PORTALS4_MAX_SEGMENT ?
|
size_t max_msg_size = (COLL_PORTALS4_MAX_BW > mca_coll_portals4_component.ni_limits.max_msg_size) ?
|
||||||
((request->u.bcast.tmpsize + COLL_PORTALS4_MAX_BW -1) / COLL_PORTALS4_MAX_BW) :
|
mca_coll_portals4_component.ni_limits.max_msg_size :
|
||||||
COLL_PORTALS4_MAX_SEGMENT) :
|
COLL_PORTALS4_MAX_BW;
|
||||||
|
|
||||||
|
//TODO : Either make compatible Portals size limits and COLL_PORTALS4_MAX_SEGMENT or remove COLL_PORTALS4_MAX_SEGMENT
|
||||||
|
request->u.bcast.segment_nb = (request->u.bcast.tmpsize > max_msg_size) ?
|
||||||
|
(((request->u.bcast.tmpsize + max_msg_size -1) / max_msg_size) < COLL_PORTALS4_MAX_SEGMENT ?
|
||||||
|
((request->u.bcast.tmpsize + max_msg_size -1) / max_msg_size) : COLL_PORTALS4_MAX_SEGMENT) :
|
||||||
1;
|
1;
|
||||||
|
|
||||||
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
|
"seg_number=%d , seg_size_max=%lu", request->u.bcast.segment_nb, max_msg_size));
|
||||||
|
}
|
||||||
if (request->u.bcast.segment_nb > COLL_PORTALS4_BCAST_ALGO_THRESHOLD) {
|
if (request->u.bcast.segment_nb > COLL_PORTALS4_BCAST_ALGO_THRESHOLD) {
|
||||||
request->u.bcast.algo = OMPI_COLL_PORTALS4_BCAST_PIPELINE_ALGO;
|
request->u.bcast.algo = OMPI_COLL_PORTALS4_BCAST_PIPELINE_ALGO;
|
||||||
}
|
}
|
||||||
@ -137,9 +145,9 @@ bcast_kary_tree_top(void *buff, int count,
|
|||||||
mca_coll_portals4_module_t *portals4_module)
|
mca_coll_portals4_module_t *portals4_module)
|
||||||
{
|
{
|
||||||
bool is_sync = request->is_sync;
|
bool is_sync = request->is_sync;
|
||||||
int ret, seg;
|
int ret;
|
||||||
unsigned int i;
|
unsigned int i, seg, seg_size, nb_long;
|
||||||
int segment_nb = request->u.bcast.segment_nb;
|
unsigned int segment_nb = request->u.bcast.segment_nb;
|
||||||
unsigned int child_nb;
|
unsigned int child_nb;
|
||||||
int size = ompi_comm_size(comm);
|
int size = ompi_comm_size(comm);
|
||||||
int rank = ompi_comm_rank(comm);
|
int rank = ompi_comm_rank(comm);
|
||||||
@ -201,15 +209,22 @@ bcast_kary_tree_top(void *buff, int count,
|
|||||||
COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm), 0, 0,
|
COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm), 0, 0,
|
||||||
COLL_PORTALS4_BCAST, 0, internal_count);
|
COLL_PORTALS4_BCAST, 0, internal_count);
|
||||||
|
|
||||||
|
/* The data will be cut in segment_nb segments.
|
||||||
|
* nb_long segments will have a size of (seg_size + 1)
|
||||||
|
* and (segment_nb - nb_long) segments will have a size of seg_size
|
||||||
|
*/
|
||||||
|
seg_size = request->u.bcast.tmpsize / segment_nb;
|
||||||
|
nb_long = request->u.bcast.tmpsize % segment_nb;
|
||||||
|
opal_output_verbose(10, ompi_coll_base_framework.framework_output, "seg_size=%d nb_long=%d segment_nb=%d", seg_size, nb_long, segment_nb);
|
||||||
|
|
||||||
if (rank != root) {
|
if (rank != root) {
|
||||||
for (seg = 1, offset = 0, length = 0 ;
|
for (seg = 1, offset = 0, length = 0 ;
|
||||||
seg <= segment_nb ;
|
seg <= segment_nb ;
|
||||||
seg++, offset += length) {
|
seg++, offset += length) {
|
||||||
|
|
||||||
/* Divide buffer into segments */
|
/* Divide buffer into segments */
|
||||||
length = (seg < segment_nb) ?
|
if (seg <= nb_long) length = seg_size + 1;
|
||||||
(request->u.bcast.tmpsize + segment_nb - 1) / segment_nb :
|
else length = seg_size;
|
||||||
request->u.bcast.tmpsize - ((request->u.bcast.tmpsize + segment_nb - 1) / segment_nb) * (segment_nb - 1);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** Prepare Data ME
|
** Prepare Data ME
|
||||||
@ -352,13 +367,14 @@ bcast_kary_tree_top(void *buff, int count,
|
|||||||
seg++, offset += length) {
|
seg++, offset += length) {
|
||||||
|
|
||||||
/* Divide buffer into segments */
|
/* Divide buffer into segments */
|
||||||
length = (seg < segment_nb) ?
|
if (seg <= nb_long) length = seg_size + 1;
|
||||||
(request->u.bcast.tmpsize + segment_nb - 1) / segment_nb :
|
else length = seg_size;
|
||||||
request->u.bcast.tmpsize - ((request->u.bcast.tmpsize + segment_nb - 1) / segment_nb) * (segment_nb - 1);
|
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
|
||||||
|
"bcast with k-ary tree : segment of size %ld", length);
|
||||||
|
|
||||||
/* compute the triggering threshold to send data to the children */
|
/* compute the triggering threshold to send data to the children */
|
||||||
trig_thr = (rank == root) ? (segment_nb) :
|
trig_thr = segment_nb + seg - 1; /* To be sure the set of PtlTriggeredPut of DATA will be executed in order */
|
||||||
(segment_nb + seg);
|
if (rank != root) trig_thr ++;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** Send Data to children
|
** Send Data to children
|
||||||
@ -381,6 +397,17 @@ bcast_kary_tree_top(void *buff, int count,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rank == root) {
|
||||||
|
trig_thr = segment_nb;
|
||||||
|
ct_inc.success = segment_nb;
|
||||||
|
ct_inc.failure = 0;
|
||||||
|
|
||||||
|
if ((ret = PtlTriggeredCTInc(request->u.bcast.trig_ct_h, ct_inc,
|
||||||
|
request->u.bcast.trig_ct_h, trig_thr)) != 0) {
|
||||||
|
return opal_stderr("PtlTriggeredCTInc failed", __FILE__, __LINE__, ret);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ack_thr = child_nb;
|
ack_thr = child_nb;
|
||||||
|
|
||||||
if (is_sync) {
|
if (is_sync) {
|
||||||
@ -409,9 +436,28 @@ bcast_kary_tree_top(void *buff, int count,
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
if (rank != root) {
|
if (rank != root) {
|
||||||
ack_thr = segment_nb;
|
trig_thr = segment_nb;
|
||||||
if (is_sync) {
|
if (is_sync) {
|
||||||
if ((ret = PtlCTWait(request->u.bcast.trig_ct_h, ack_thr, &ct)) != 0) {
|
/* Each leaf has a pending PtlTriggeredPut (to send the final ACK). We must call PtlTriggeredCTInc twice.
|
||||||
|
Otherwise, we could pass the PtlCTWait and then free the CT too early and the Put wouldn't be triggered.
|
||||||
|
|
||||||
|
This is necessary because portals4 does not insure the order in the triggered operations associated
|
||||||
|
with the same threshold. In the case where PtlCTWait is not called (else case), this is not necessary. */
|
||||||
|
|
||||||
|
ct_inc.success = 1;
|
||||||
|
ct_inc.failure = 0;
|
||||||
|
|
||||||
|
if ((ret = PtlTriggeredCTInc(request->u.bcast.trig_ct_h, ct_inc,
|
||||||
|
request->u.bcast.trig_ct_h, trig_thr)) != 0) {
|
||||||
|
return opal_stderr("PtlTriggeredCTInc failed", __FILE__, __LINE__, ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((ret = PtlTriggeredCTInc(request->u.bcast.trig_ct_h, ct_inc,
|
||||||
|
request->u.bcast.trig_ct_h, trig_thr + 1)) != 0) {
|
||||||
|
return opal_stderr("PtlTriggeredCTInc failed", __FILE__, __LINE__, ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((ret = PtlCTWait(request->u.bcast.trig_ct_h, trig_thr + 2, &ct)) != 0) {
|
||||||
opal_stderr("PtlCTWait failed", __FILE__, __LINE__, ret);
|
opal_stderr("PtlCTWait failed", __FILE__, __LINE__, ret);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -421,7 +467,7 @@ bcast_kary_tree_top(void *buff, int count,
|
|||||||
mca_coll_portals4_component.finish_pt_idx,
|
mca_coll_portals4_component.finish_pt_idx,
|
||||||
0, 0, NULL, (uintptr_t) request,
|
0, 0, NULL, (uintptr_t) request,
|
||||||
request->u.bcast.trig_ct_h,
|
request->u.bcast.trig_ct_h,
|
||||||
ack_thr)) != 0) {
|
trig_thr)) != 0) {
|
||||||
return opal_stderr("PtlTriggeredPut failed", __FILE__, __LINE__, ret);
|
return opal_stderr("PtlTriggeredPut failed", __FILE__, __LINE__, ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -440,8 +486,9 @@ bcast_pipeline_top(void *buff, int count,
|
|||||||
mca_coll_portals4_module_t *portals4_module)
|
mca_coll_portals4_module_t *portals4_module)
|
||||||
{
|
{
|
||||||
bool is_sync = request->is_sync;
|
bool is_sync = request->is_sync;
|
||||||
int ret, seg;
|
int ret;
|
||||||
int segment_nb = request->u.bcast.segment_nb;
|
unsigned int seg, seg_size, nb_long;
|
||||||
|
unsigned int segment_nb = request->u.bcast.segment_nb;
|
||||||
int size = ompi_comm_size(comm);
|
int size = ompi_comm_size(comm);
|
||||||
int rank = ompi_comm_rank(comm);
|
int rank = ompi_comm_rank(comm);
|
||||||
ptl_rank_t parent, child;
|
ptl_rank_t parent, child;
|
||||||
@ -492,6 +539,13 @@ bcast_pipeline_top(void *buff, int count,
|
|||||||
|
|
||||||
COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm), 0, 0,
|
COLL_PORTALS4_SET_BITS(match_bits, ompi_comm_get_cid(comm), 0, 0,
|
||||||
COLL_PORTALS4_BCAST, 0, internal_count);
|
COLL_PORTALS4_BCAST, 0, internal_count);
|
||||||
|
/* The data will be cut in segment_nb segments.
|
||||||
|
* nb_long segments will have a size of (seg_size + 1)
|
||||||
|
* and (segment_nb - nb_long) segments will have a size of seg_size
|
||||||
|
*/
|
||||||
|
seg_size = request->u.bcast.tmpsize / segment_nb;
|
||||||
|
nb_long = request->u.bcast.tmpsize % segment_nb;
|
||||||
|
opal_output_verbose(10, ompi_coll_base_framework.framework_output, "seg_size=%d nb_long=%d", seg_size, nb_long);
|
||||||
|
|
||||||
if (rank != root) {
|
if (rank != root) {
|
||||||
for (seg = 1, offset = 0, length = 0 ;
|
for (seg = 1, offset = 0, length = 0 ;
|
||||||
@ -499,9 +553,8 @@ bcast_pipeline_top(void *buff, int count,
|
|||||||
seg++, offset += length) {
|
seg++, offset += length) {
|
||||||
|
|
||||||
/* Divide buffer into segments */
|
/* Divide buffer into segments */
|
||||||
length = (seg < segment_nb) ?
|
if (seg <= nb_long) length = seg_size + 1;
|
||||||
(request->u.bcast.tmpsize + segment_nb - 1) / segment_nb :
|
else length = seg_size;
|
||||||
request->u.bcast.tmpsize - ((request->u.bcast.tmpsize + segment_nb - 1) / segment_nb) * (segment_nb - 1);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** Prepare Data ME
|
** Prepare Data ME
|
||||||
@ -642,13 +695,14 @@ bcast_pipeline_top(void *buff, int count,
|
|||||||
seg++, offset += length) {
|
seg++, offset += length) {
|
||||||
|
|
||||||
/* Divide buffer into segments */
|
/* Divide buffer into segments */
|
||||||
length = (seg < segment_nb) ?
|
if (seg <= nb_long) length = seg_size + 1;
|
||||||
(request->u.bcast.tmpsize + segment_nb - 1) / segment_nb :
|
else length = seg_size;
|
||||||
request->u.bcast.tmpsize - ((request->u.bcast.tmpsize + segment_nb - 1) / segment_nb) * (segment_nb - 1);
|
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
|
||||||
|
"bcast with pipeline : segment of size %ld \n", length);
|
||||||
|
|
||||||
/* compute the triggering threshold to send data to the children */
|
/* compute the triggering threshold to send data to the children */
|
||||||
trig_thr = (rank == root) ? (segment_nb) :
|
trig_thr = segment_nb + seg - 1; /* To be sure the PtlTriggeredPut will be executed in order */
|
||||||
(segment_nb + seg);
|
if (rank != root) trig_thr ++;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
** Send Data to children
|
** Send Data to children
|
||||||
@ -668,6 +722,16 @@ bcast_pipeline_top(void *buff, int count,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (rank == root) {
|
||||||
|
trig_thr = segment_nb;
|
||||||
|
ct_inc.success = segment_nb;
|
||||||
|
ct_inc.failure = 0;
|
||||||
|
|
||||||
|
if ((ret = PtlTriggeredCTInc(request->u.bcast.trig_ct_h, ct_inc,
|
||||||
|
request->u.bcast.trig_ct_h, trig_thr)) != 0) {
|
||||||
|
return opal_stderr("PtlTriggeredCTInc failed", __FILE__, __LINE__, ret);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (is_sync) {
|
if (is_sync) {
|
||||||
if ((ret = PtlCTWait(request->u.bcast.ack_ct_h, 1, &ct)) != 0) {
|
if ((ret = PtlCTWait(request->u.bcast.ack_ct_h, 1, &ct)) != 0) {
|
||||||
@ -696,8 +760,29 @@ bcast_pipeline_top(void *buff, int count,
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
if (rank != root) {
|
if (rank != root) {
|
||||||
|
trig_thr = segment_nb;
|
||||||
|
|
||||||
if (is_sync) {
|
if (is_sync) {
|
||||||
if ((ret = PtlCTWait(request->u.bcast.trig_ct_h, segment_nb, &ct)) != 0) {
|
/* Each leaf has a pending PtlTriggeredPut (to send the final ACK). We must call PtlTriggeredCTInc twice.
|
||||||
|
Otherwise, we could pass the PtlCTWait and then free the CT too early and the Put wouldn't be triggered.
|
||||||
|
|
||||||
|
This is necessary because portals4 does not insure the order in the triggered operations associated
|
||||||
|
with the same threshold. In the case where PtlCTWait is not called (else case), this is not necessary. */
|
||||||
|
|
||||||
|
ct_inc.success = 1;
|
||||||
|
ct_inc.failure = 0;
|
||||||
|
|
||||||
|
if ((ret = PtlTriggeredCTInc(request->u.bcast.trig_ct_h, ct_inc,
|
||||||
|
request->u.bcast.trig_ct_h, trig_thr)) != 0) {
|
||||||
|
return opal_stderr("PtlTriggeredCTInc failed", __FILE__, __LINE__, ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((ret = PtlTriggeredCTInc(request->u.bcast.trig_ct_h, ct_inc,
|
||||||
|
request->u.bcast.trig_ct_h, trig_thr + 1)) != 0) {
|
||||||
|
return opal_stderr("PtlTriggeredCTInc failed", __FILE__, __LINE__, ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((ret = PtlCTWait(request->u.bcast.trig_ct_h, trig_thr + 2, &ct)) != 0) {
|
||||||
opal_stderr("PtlCTWait failed", __FILE__, __LINE__, ret);
|
opal_stderr("PtlCTWait failed", __FILE__, __LINE__, ret);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -707,7 +792,7 @@ bcast_pipeline_top(void *buff, int count,
|
|||||||
mca_coll_portals4_component.finish_pt_idx,
|
mca_coll_portals4_component.finish_pt_idx,
|
||||||
0, 0, NULL, (uintptr_t) request,
|
0, 0, NULL, (uintptr_t) request,
|
||||||
request->u.bcast.trig_ct_h,
|
request->u.bcast.trig_ct_h,
|
||||||
segment_nb)) != 0) {
|
trig_thr)) != 0) {
|
||||||
return opal_stderr("PtlTriggeredPut failed", __FILE__, __LINE__, ret);
|
return opal_stderr("PtlTriggeredPut failed", __FILE__, __LINE__, ret);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -831,7 +916,7 @@ ompi_coll_portals4_ibcast_intra(void *buff, int count,
|
|||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
puts("ibcast");
|
opal_output_verbose(10, ompi_coll_base_framework.framework_output, "ibcast_intra");
|
||||||
return (OMPI_SUCCESS);
|
return (OMPI_SUCCESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -860,5 +945,6 @@ ompi_coll_portals4_ibcast_intra_fini(ompi_coll_portals4_request_t *request)
|
|||||||
ompi_request_complete(&request->super, true);
|
ompi_request_complete(&request->super, true);
|
||||||
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
||||||
|
|
||||||
|
opal_output_verbose(10, ompi_coll_base_framework.framework_output, "ibcast_intra_fini");
|
||||||
return (OMPI_SUCCESS);
|
return (OMPI_SUCCESS);
|
||||||
}
|
}
|
||||||
|
@ -211,6 +211,16 @@ portals4_register(void)
|
|||||||
MCA_BASE_VAR_SCOPE_READONLY,
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||||||
&mca_coll_portals4_component.use_binomial_gather_algorithm);
|
&mca_coll_portals4_component.use_binomial_gather_algorithm);
|
||||||
|
|
||||||
|
mca_coll_portals4_component.portals_max_msg_size = PTL_SIZE_MAX;
|
||||||
|
(void) mca_base_component_var_register(&mca_coll_portals4_component.super.collm_version,
|
||||||
|
"max_msg_size",
|
||||||
|
"Max size supported by portals4 (above that, a message is cut into messages less than that size)",
|
||||||
|
MCA_BASE_VAR_TYPE_UNSIGNED_LONG,
|
||||||
|
NULL, 0, 0,
|
||||||
|
OPAL_INFO_LVL_9,
|
||||||
|
MCA_BASE_VAR_SCOPE_READONLY,
|
||||||
|
&mca_coll_portals4_component.portals_max_msg_size);
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -369,7 +379,13 @@ portals4_init_query(bool enable_progress_threads,
|
|||||||
__FILE__, __LINE__, ret);
|
__FILE__, __LINE__, ret);
|
||||||
return OMPI_ERROR;
|
return OMPI_ERROR;
|
||||||
}
|
}
|
||||||
|
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
|
||||||
|
"ni_limits.max_atomic_size=%ld", mca_coll_portals4_component.ni_limits.max_atomic_size);
|
||||||
|
|
||||||
|
if (mca_coll_portals4_component.portals_max_msg_size < mca_coll_portals4_component.ni_limits.max_msg_size)
|
||||||
|
mca_coll_portals4_component.ni_limits.max_msg_size = mca_coll_portals4_component.portals_max_msg_size;
|
||||||
|
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
|
||||||
|
"ni_limits.max_msg_size=%lu", mca_coll_portals4_component.ni_limits.max_msg_size);
|
||||||
|
|
||||||
ret = PtlGetId(mca_coll_portals4_component.ni_h, &mca_coll_portals4_component.id);
|
ret = PtlGetId(mca_coll_portals4_component.ni_h, &mca_coll_portals4_component.id);
|
||||||
if (PTL_OK != ret) {
|
if (PTL_OK != ret) {
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
#include "coll_portals4.h"
|
#include "coll_portals4.h"
|
||||||
#include "coll_portals4_request.h"
|
#include "coll_portals4_request.h"
|
||||||
|
|
||||||
|
#include <string.h> // included for ffs in get_tree_numdescendants_of
|
||||||
|
|
||||||
#undef RTR_USES_TRIGGERED_PUT
|
#undef RTR_USES_TRIGGERED_PUT
|
||||||
|
|
||||||
@ -55,6 +56,22 @@
|
|||||||
* |
|
* |
|
||||||
* 15
|
* 15
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
static int32_t get_tree_numdescendants_of(struct ompi_communicator_t* comm,
|
||||||
|
int vrank)
|
||||||
|
{
|
||||||
|
int max;
|
||||||
|
int size = ompi_comm_size(comm);
|
||||||
|
|
||||||
|
if (0 == vrank) {
|
||||||
|
return size - 1;
|
||||||
|
} else {
|
||||||
|
max = 1 << ffs(vrank - 1);
|
||||||
|
return ((vrank + max <= size ) ? max : size - vrank) -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
static ompi_coll_portals4_tree_t*
|
static ompi_coll_portals4_tree_t*
|
||||||
ompi_coll_portals4_build_in_order_bmtree( struct ompi_communicator_t* comm,
|
ompi_coll_portals4_build_in_order_bmtree( struct ompi_communicator_t* comm,
|
||||||
int root )
|
int root )
|
||||||
@ -506,8 +523,10 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc
|
|||||||
int32_t expected_ops =0;
|
int32_t expected_ops =0;
|
||||||
int32_t expected_acks=0;
|
int32_t expected_acks=0;
|
||||||
|
|
||||||
|
ptl_size_t number_of_fragment_gathered = 0;
|
||||||
|
ptl_size_t number_of_fragment_send = 1;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:gather_intra_binomial_top enter rank %d", request->u.gather.my_rank));
|
"coll:portals4:gather_intra_binomial_top enter rank %d", request->u.gather.my_rank));
|
||||||
|
|
||||||
request->type = OMPI_COLL_PORTALS4_TYPE_GATHER;
|
request->type = OMPI_COLL_PORTALS4_TYPE_GATHER;
|
||||||
@ -579,6 +598,23 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc
|
|||||||
ret = setup_sync_handles(comm, request, portals4_module);
|
ret = setup_sync_handles(comm, request, portals4_module);
|
||||||
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
||||||
|
|
||||||
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
|
"%s:%d: packed_size=%lu, fragment_size=%lu",
|
||||||
|
__FILE__, __LINE__, request->u.gather.packed_size, mca_coll_portals4_component.ni_limits.max_msg_size));
|
||||||
|
|
||||||
|
for (int i =0; i < bmtree->tree_nextsize; i++) {
|
||||||
|
int child_vrank = VRANK(bmtree->tree_next[i], request->u.gather.root_rank, request->u.gather.size);
|
||||||
|
int sub_tree_size = get_tree_numdescendants_of(comm, child_vrank) + 1;
|
||||||
|
ptl_size_t local_number_of_fragment = ((sub_tree_size * request->u.gather.packed_size) + mca_coll_portals4_component.ni_limits.max_msg_size -1) / mca_coll_portals4_component.ni_limits.max_msg_size;
|
||||||
|
|
||||||
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
|
"%s:%d: %d is child of %d(%d) with %d descendants (nb_frag += %lu)",
|
||||||
|
__FILE__, __LINE__, bmtree->tree_next[i], vrank, request->u.gather.root_rank , sub_tree_size, local_number_of_fragment));
|
||||||
|
number_of_fragment_gathered += local_number_of_fragment;
|
||||||
|
}
|
||||||
|
|
||||||
|
number_of_fragment_send = (request->u.gather.gather_bytes + mca_coll_portals4_component.ni_limits.max_msg_size -1) / mca_coll_portals4_component.ni_limits.max_msg_size;
|
||||||
|
|
||||||
/***********************************************/
|
/***********************************************/
|
||||||
/* Chain the RTR and Recv-ACK to the Gather CT */
|
/* Chain the RTR and Recv-ACK to the Gather CT */
|
||||||
/***********************************************/
|
/***********************************************/
|
||||||
@ -603,7 +639,7 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc
|
|||||||
if (vrank == 0) {
|
if (vrank == 0) {
|
||||||
/* root, so do nothing */
|
/* root, so do nothing */
|
||||||
|
|
||||||
expected_ops=bmtree->tree_nextsize; /* gather put from each child */
|
expected_ops=number_of_fragment_gathered ; /* gather put from each child */
|
||||||
expected_acks=0;
|
expected_acks=0;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
@ -617,22 +653,32 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc
|
|||||||
__FILE__, __LINE__, vrank,
|
__FILE__, __LINE__, vrank,
|
||||||
remote_offset, vrank, vparent, request->u.gather.packed_size);
|
remote_offset, vrank, vparent, request->u.gather.packed_size);
|
||||||
|
|
||||||
expected_ops=bmtree->tree_nextsize + 1; /* gather put from each child + a chained RTR */
|
expected_ops=number_of_fragment_gathered + 1; /* gather puts from each child + a chained RTR */
|
||||||
expected_acks=1; /* Recv-ACK from parent */
|
expected_acks=1; /* Recv-ACK from parent */
|
||||||
|
|
||||||
|
ptl_size_t size_sent = 0;
|
||||||
|
ptl_size_t size_left = request->u.gather.gather_bytes;
|
||||||
|
|
||||||
|
for (ptl_size_t i = 0 ; i < number_of_fragment_send; i++) {
|
||||||
|
ptl_size_t frag_size = (size_left > mca_coll_portals4_component.ni_limits.max_msg_size) ?
|
||||||
|
mca_coll_portals4_component.ni_limits.max_msg_size:
|
||||||
|
size_left;
|
||||||
ret = PtlTriggeredPut(request->u.gather.gather_mdh,
|
ret = PtlTriggeredPut(request->u.gather.gather_mdh,
|
||||||
request->u.gather.gather_offset,
|
request->u.gather.gather_offset + size_sent,
|
||||||
request->u.gather.gather_bytes,
|
frag_size,
|
||||||
PTL_NO_ACK_REQ,
|
PTL_NO_ACK_REQ,
|
||||||
ompi_coll_portals4_get_peer(comm, parent),
|
ompi_coll_portals4_get_peer(comm, parent),
|
||||||
mca_coll_portals4_component.pt_idx,
|
mca_coll_portals4_component.pt_idx,
|
||||||
request->u.gather.gather_match_bits,
|
request->u.gather.gather_match_bits,
|
||||||
remote_offset,
|
remote_offset + size_sent,
|
||||||
NULL,
|
NULL,
|
||||||
0,
|
0,
|
||||||
request->u.gather.gather_cth,
|
request->u.gather.gather_cth,
|
||||||
expected_ops);
|
expected_ops);
|
||||||
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
||||||
|
size_left -= frag_size;
|
||||||
|
size_sent += frag_size;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/************************************/
|
/************************************/
|
||||||
@ -734,7 +780,7 @@ ompi_coll_portals4_gather_intra_binomial_top(const void *sbuf, int scount, struc
|
|||||||
|
|
||||||
ompi_coll_portals4_destroy_tree(&(portals4_module->cached_in_order_bmtree));
|
ompi_coll_portals4_destroy_tree(&(portals4_module->cached_in_order_bmtree));
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:gather_intra_binomial_top exit rank %d", request->u.gather.my_rank));
|
"coll:portals4:gather_intra_binomial_top exit rank %d", request->u.gather.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -773,8 +819,9 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct
|
|||||||
int32_t expected_ops =0;
|
int32_t expected_ops =0;
|
||||||
int32_t expected_acks=0;
|
int32_t expected_acks=0;
|
||||||
|
|
||||||
|
ptl_size_t number_of_fragment = 1;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:gather_intra_linear_top enter rank %d", request->u.gather.my_rank));
|
"coll:portals4:gather_intra_linear_top enter rank %d", request->u.gather.my_rank));
|
||||||
|
|
||||||
request->type = OMPI_COLL_PORTALS4_TYPE_GATHER;
|
request->type = OMPI_COLL_PORTALS4_TYPE_GATHER;
|
||||||
@ -843,6 +890,13 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct
|
|||||||
ret = setup_sync_handles(comm, request, portals4_module);
|
ret = setup_sync_handles(comm, request, portals4_module);
|
||||||
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
||||||
|
|
||||||
|
number_of_fragment = (request->u.gather.packed_size > mca_coll_portals4_component.ni_limits.max_msg_size) ?
|
||||||
|
(request->u.gather.packed_size + mca_coll_portals4_component.ni_limits.max_msg_size - 1) / mca_coll_portals4_component.ni_limits.max_msg_size :
|
||||||
|
1;
|
||||||
|
opal_output_verbose(90, ompi_coll_base_framework.framework_output,
|
||||||
|
"%s:%d:rank %d:number_of_fragment = %lu",
|
||||||
|
__FILE__, __LINE__, request->u.gather.my_rank, number_of_fragment);
|
||||||
|
|
||||||
/***********************************************/
|
/***********************************************/
|
||||||
/* Chain the RTR and Recv-ACK to the Gather CT */
|
/* Chain the RTR and Recv-ACK to the Gather CT */
|
||||||
/***********************************************/
|
/***********************************************/
|
||||||
@ -867,11 +921,13 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct
|
|||||||
if (i_am_root) {
|
if (i_am_root) {
|
||||||
/* root, so do nothing */
|
/* root, so do nothing */
|
||||||
|
|
||||||
expected_ops=request->u.gather.size-1; /* gather put from all other ranks */
|
expected_ops=(request->u.gather.size-1) * number_of_fragment; /* gather put from all other ranks */
|
||||||
expected_acks=0;
|
expected_acks=0;
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
ptl_size_t remote_offset=request->u.gather.my_rank * request->u.gather.packed_size;
|
ptl_size_t remote_offset=request->u.gather.my_rank * request->u.gather.packed_size;
|
||||||
|
ptl_size_t split_offset = 0;
|
||||||
|
ptl_size_t size_left = request->u.gather.gather_bytes;
|
||||||
|
|
||||||
opal_output_verbose(30, ompi_coll_base_framework.framework_output,
|
opal_output_verbose(30, ompi_coll_base_framework.framework_output,
|
||||||
"%s:%d:rank(%d): remote_offset(%lu)=rank(%d) * packed_size(%ld)",
|
"%s:%d:rank(%d): remote_offset(%lu)=rank(%d) * packed_size(%ld)",
|
||||||
@ -881,19 +937,34 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct
|
|||||||
expected_ops=1; /* chained RTR */
|
expected_ops=1; /* chained RTR */
|
||||||
expected_acks=1; /* Recv-ACK from root */
|
expected_acks=1; /* Recv-ACK from root */
|
||||||
|
|
||||||
|
for (ptl_size_t j=0; j<number_of_fragment; j++) {
|
||||||
|
|
||||||
|
ptl_size_t frag_size = (size_left > mca_coll_portals4_component.ni_limits.max_msg_size) ?
|
||||||
|
mca_coll_portals4_component.ni_limits.max_msg_size :
|
||||||
|
size_left;
|
||||||
|
|
||||||
|
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
|
||||||
|
"%s:%d:rank(%d): frag(%lu),offset_frag (%lu) frag_size(%lu)",
|
||||||
|
__FILE__, __LINE__, request->u.gather.my_rank,
|
||||||
|
j, split_offset, frag_size);
|
||||||
|
|
||||||
ret = PtlTriggeredPut(request->u.gather.gather_mdh,
|
ret = PtlTriggeredPut(request->u.gather.gather_mdh,
|
||||||
request->u.gather.gather_offset,
|
request->u.gather.gather_offset + split_offset,
|
||||||
request->u.gather.gather_bytes,
|
frag_size,
|
||||||
PTL_NO_ACK_REQ,
|
PTL_NO_ACK_REQ,
|
||||||
ompi_coll_portals4_get_peer(comm, request->u.gather.root_rank),
|
ompi_coll_portals4_get_peer(comm, request->u.gather.root_rank),
|
||||||
mca_coll_portals4_component.pt_idx,
|
mca_coll_portals4_component.pt_idx,
|
||||||
request->u.gather.gather_match_bits,
|
request->u.gather.gather_match_bits,
|
||||||
remote_offset,
|
remote_offset + split_offset,
|
||||||
NULL,
|
NULL,
|
||||||
0,
|
0,
|
||||||
request->u.gather.gather_cth,
|
request->u.gather.gather_cth,
|
||||||
expected_ops);
|
expected_ops);
|
||||||
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
||||||
|
|
||||||
|
size_left -= frag_size;
|
||||||
|
split_offset += frag_size;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*****************************************/
|
/*****************************************/
|
||||||
@ -997,7 +1068,7 @@ ompi_coll_portals4_gather_intra_linear_top(const void *sbuf, int scount, struct
|
|||||||
"completed CTWait(expected_ops=%d)\n", expected_ops);
|
"completed CTWait(expected_ops=%d)\n", expected_ops);
|
||||||
}
|
}
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:gather_intra_linear_top exit rank %d", request->u.gather.my_rank));
|
"coll:portals4:gather_intra_linear_top exit rank %d", request->u.gather.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -1020,7 +1091,7 @@ ompi_coll_portals4_gather_intra_binomial_bottom(struct ompi_communicator_t *comm
|
|||||||
int ret, line;
|
int ret, line;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:gather_intra_binomial_bottom enter rank %d", request->u.gather.my_rank));
|
"coll:portals4:gather_intra_binomial_bottom enter rank %d", request->u.gather.my_rank));
|
||||||
|
|
||||||
ret = cleanup_gather_handles(request);
|
ret = cleanup_gather_handles(request);
|
||||||
@ -1065,7 +1136,7 @@ ompi_coll_portals4_gather_intra_binomial_bottom(struct ompi_communicator_t *comm
|
|||||||
ompi_request_complete(&request->super, true);
|
ompi_request_complete(&request->super, true);
|
||||||
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:gather_intra_binomial_bottom exit rank %d", request->u.gather.my_rank));
|
"coll:portals4:gather_intra_binomial_bottom exit rank %d", request->u.gather.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -1090,7 +1161,7 @@ ompi_coll_portals4_gather_intra_linear_bottom(struct ompi_communicator_t *comm,
|
|||||||
int ret, line;
|
int ret, line;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:gather_intra_linear_bottom enter rank %d", request->u.gather.my_rank));
|
"coll:portals4:gather_intra_linear_bottom enter rank %d", request->u.gather.my_rank));
|
||||||
|
|
||||||
ret = cleanup_gather_handles(request);
|
ret = cleanup_gather_handles(request);
|
||||||
@ -1128,7 +1199,7 @@ ompi_coll_portals4_gather_intra_linear_bottom(struct ompi_communicator_t *comm,
|
|||||||
ompi_request_complete(&request->super, true);
|
ompi_request_complete(&request->super, true);
|
||||||
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:gather_intra_linear_bottom exit rank %d", request->u.gather.my_rank));
|
"coll:portals4:gather_intra_linear_bottom exit rank %d", request->u.gather.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -1157,7 +1228,7 @@ ompi_coll_portals4_gather_intra(const void *sbuf, int scount, struct ompi_dataty
|
|||||||
|
|
||||||
ompi_coll_portals4_request_t *request;
|
ompi_coll_portals4_request_t *request;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:gather_intra enter rank %d", ompi_comm_rank(comm)));
|
"coll:portals4:gather_intra enter rank %d", ompi_comm_rank(comm)));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1204,7 +1275,7 @@ ompi_coll_portals4_gather_intra(const void *sbuf, int scount, struct ompi_dataty
|
|||||||
*/
|
*/
|
||||||
OMPI_COLL_PORTALS4_REQUEST_RETURN(request);
|
OMPI_COLL_PORTALS4_REQUEST_RETURN(request);
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:gather_intra exit rank %d", request->u.gather.my_rank));
|
"coll:portals4:gather_intra exit rank %d", request->u.gather.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -1230,7 +1301,7 @@ ompi_coll_portals4_igather_intra(const void *sbuf, int scount, struct ompi_datat
|
|||||||
|
|
||||||
ompi_coll_portals4_request_t *request;
|
ompi_coll_portals4_request_t *request;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:igather_intra enter rank %d", ompi_comm_rank(comm)));
|
"coll:portals4:igather_intra enter rank %d", ompi_comm_rank(comm)));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1267,7 +1338,7 @@ ompi_coll_portals4_igather_intra(const void *sbuf, int scount, struct ompi_datat
|
|||||||
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
||||||
}
|
}
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:igather_intra exit rank %d", request->u.gather.my_rank));
|
"coll:portals4:igather_intra exit rank %d", request->u.gather.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -1286,7 +1357,7 @@ ompi_coll_portals4_igather_intra_fini(ompi_coll_portals4_request_t *request)
|
|||||||
{
|
{
|
||||||
int ret, line;
|
int ret, line;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:igather_intra_fini enter rank %d", request->u.gather.my_rank));
|
"coll:portals4:igather_intra_fini enter rank %d", request->u.gather.my_rank));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1300,7 +1371,7 @@ ompi_coll_portals4_igather_intra_fini(ompi_coll_portals4_request_t *request)
|
|||||||
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
||||||
}
|
}
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:igather_intra_fini exit rank %d", request->u.gather.my_rank));
|
"coll:portals4:igather_intra_fini exit rank %d", request->u.gather.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
|
@ -428,7 +428,7 @@ ompi_coll_portals4_ireduce_intra(const void* sendbuf, void* recvbuf, int count,
|
|||||||
OMPI_COLL_PORTALS4_REQUEST_RETURN(request);
|
OMPI_COLL_PORTALS4_REQUEST_RETURN(request);
|
||||||
}
|
}
|
||||||
|
|
||||||
puts("ireduce");
|
opal_output_verbose(10, ompi_coll_base_framework.framework_output, "ireduce");
|
||||||
return (OMPI_SUCCESS);
|
return (OMPI_SUCCESS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -127,7 +127,7 @@ setup_scatter_handles(struct ompi_communicator_t *comm,
|
|||||||
|
|
||||||
ptl_me_t me;
|
ptl_me_t me;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:setup_scatter_handles enter rank %d", request->u.scatter.my_rank));
|
"coll:portals4:setup_scatter_handles enter rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
/**********************************/
|
/**********************************/
|
||||||
@ -136,7 +136,7 @@ setup_scatter_handles(struct ompi_communicator_t *comm,
|
|||||||
COLL_PORTALS4_SET_BITS(request->u.scatter.scatter_match_bits, ompi_comm_get_cid(comm),
|
COLL_PORTALS4_SET_BITS(request->u.scatter.scatter_match_bits, ompi_comm_get_cid(comm),
|
||||||
0, 0, COLL_PORTALS4_SCATTER, 0, request->u.scatter.coll_count);
|
0, 0, COLL_PORTALS4_SCATTER, 0, request->u.scatter.coll_count);
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:setup_scatter_handles rank(%d) scatter_match_bits(0x%016lX)",
|
"coll:portals4:setup_scatter_handles rank(%d) scatter_match_bits(0x%016lX)",
|
||||||
request->u.scatter.my_rank, request->u.scatter.scatter_match_bits));
|
request->u.scatter.my_rank, request->u.scatter.scatter_match_bits));
|
||||||
|
|
||||||
@ -166,7 +166,7 @@ setup_scatter_handles(struct ompi_communicator_t *comm,
|
|||||||
&request->u.scatter.scatter_meh);
|
&request->u.scatter.scatter_meh);
|
||||||
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:setup_scatter_handles exit rank %d", request->u.scatter.my_rank));
|
"coll:portals4:setup_scatter_handles exit rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -188,7 +188,7 @@ setup_sync_handles(struct ompi_communicator_t *comm,
|
|||||||
|
|
||||||
ptl_me_t me;
|
ptl_me_t me;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:setup_sync_handles enter rank %d", request->u.scatter.my_rank));
|
"coll:portals4:setup_sync_handles enter rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
/**********************************/
|
/**********************************/
|
||||||
@ -197,7 +197,7 @@ setup_sync_handles(struct ompi_communicator_t *comm,
|
|||||||
COLL_PORTALS4_SET_BITS(request->u.scatter.sync_match_bits, ompi_comm_get_cid(comm),
|
COLL_PORTALS4_SET_BITS(request->u.scatter.sync_match_bits, ompi_comm_get_cid(comm),
|
||||||
0, 1, COLL_PORTALS4_SCATTER, 0, request->u.scatter.coll_count);
|
0, 1, COLL_PORTALS4_SCATTER, 0, request->u.scatter.coll_count);
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:setup_sync_handles rank(%d) sync_match_bits(0x%016lX)",
|
"coll:portals4:setup_sync_handles rank(%d) sync_match_bits(0x%016lX)",
|
||||||
request->u.scatter.my_rank, request->u.scatter.sync_match_bits));
|
request->u.scatter.my_rank, request->u.scatter.sync_match_bits));
|
||||||
|
|
||||||
@ -227,7 +227,7 @@ setup_sync_handles(struct ompi_communicator_t *comm,
|
|||||||
&request->u.scatter.sync_meh);
|
&request->u.scatter.sync_meh);
|
||||||
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:setup_sync_handles exit rank %d", request->u.scatter.my_rank));
|
"coll:portals4:setup_sync_handles exit rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -245,7 +245,7 @@ cleanup_scatter_handles(ompi_coll_portals4_request_t *request)
|
|||||||
{
|
{
|
||||||
int ret, line;
|
int ret, line;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:cleanup_scatter_handles enter rank %d", request->u.scatter.my_rank));
|
"coll:portals4:cleanup_scatter_handles enter rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
/**********************************/
|
/**********************************/
|
||||||
@ -265,7 +265,7 @@ cleanup_scatter_handles(ompi_coll_portals4_request_t *request)
|
|||||||
ret = PtlCTFree(request->u.scatter.scatter_cth);
|
ret = PtlCTFree(request->u.scatter.scatter_cth);
|
||||||
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:cleanup_scatter_handles exit rank %d", request->u.scatter.my_rank));
|
"coll:portals4:cleanup_scatter_handles exit rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -284,7 +284,7 @@ cleanup_sync_handles(ompi_coll_portals4_request_t *request)
|
|||||||
int ret, line;
|
int ret, line;
|
||||||
int ptl_ret;
|
int ptl_ret;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:cleanup_sync_handles enter rank %d", request->u.scatter.my_rank));
|
"coll:portals4:cleanup_sync_handles enter rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
/**********************************/
|
/**********************************/
|
||||||
@ -304,7 +304,7 @@ cleanup_sync_handles(ompi_coll_portals4_request_t *request)
|
|||||||
ret = PtlCTFree(request->u.scatter.sync_cth);
|
ret = PtlCTFree(request->u.scatter.sync_cth);
|
||||||
if (PTL_OK != ret) { ptl_ret = ret; ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
if (PTL_OK != ret) { ptl_ret = ret; ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:cleanup_sync_handles exit rank %d", request->u.scatter.my_rank));
|
"coll:portals4:cleanup_sync_handles exit rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -341,8 +341,9 @@ ompi_coll_portals4_scatter_intra_linear_top(const void *sbuf, int scount, struct
|
|||||||
int32_t expected_chained_rtrs = 0;
|
int32_t expected_chained_rtrs = 0;
|
||||||
int32_t expected_chained_acks = 0;
|
int32_t expected_chained_acks = 0;
|
||||||
|
|
||||||
|
ptl_size_t number_of_fragment = 1;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:scatter_intra_linear_top enter rank %d", request->u.scatter.my_rank));
|
"coll:portals4:scatter_intra_linear_top enter rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
request->type = OMPI_COLL_PORTALS4_TYPE_SCATTER;
|
request->type = OMPI_COLL_PORTALS4_TYPE_SCATTER;
|
||||||
@ -409,6 +410,13 @@ ompi_coll_portals4_scatter_intra_linear_top(const void *sbuf, int scount, struct
|
|||||||
ret = setup_sync_handles(comm, request, portals4_module);
|
ret = setup_sync_handles(comm, request, portals4_module);
|
||||||
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
||||||
|
|
||||||
|
number_of_fragment = (request->u.scatter.packed_size > mca_coll_portals4_component.ni_limits.max_msg_size) ?
|
||||||
|
(request->u.scatter.packed_size + mca_coll_portals4_component.ni_limits.max_msg_size - 1) / mca_coll_portals4_component.ni_limits.max_msg_size :
|
||||||
|
1;
|
||||||
|
opal_output_verbose(90, ompi_coll_base_framework.framework_output,
|
||||||
|
"%s:%d:rank %d:number_of_fragment = %lu",
|
||||||
|
__FILE__, __LINE__, request->u.scatter.my_rank, number_of_fragment);
|
||||||
|
|
||||||
/**********************************/
|
/**********************************/
|
||||||
/* do the scatter */
|
/* do the scatter */
|
||||||
/**********************************/
|
/**********************************/
|
||||||
@ -445,25 +453,42 @@ ompi_coll_portals4_scatter_intra_linear_top(const void *sbuf, int scount, struct
|
|||||||
}
|
}
|
||||||
|
|
||||||
ptl_size_t offset = request->u.scatter.packed_size * i;
|
ptl_size_t offset = request->u.scatter.packed_size * i;
|
||||||
|
ptl_size_t size_sent = 0;
|
||||||
|
ptl_size_t size_left = request->u.scatter.packed_size;
|
||||||
|
|
||||||
opal_output_verbose(30, ompi_coll_base_framework.framework_output,
|
opal_output_verbose(10, ompi_coll_base_framework.framework_output,
|
||||||
"%s:%d:rank(%d): offset(%lu)=rank(%d) * packed_size(%ld)",
|
"%s:%d:rank(%d): offset(%lu)=rank(%d) * packed_size(%ld)",
|
||||||
__FILE__, __LINE__, request->u.scatter.my_rank,
|
__FILE__, __LINE__, request->u.scatter.my_rank,
|
||||||
offset, i, request->u.scatter.packed_size);
|
offset, i, request->u.scatter.packed_size);
|
||||||
|
|
||||||
|
for (ptl_size_t j=0; j<number_of_fragment; j++) {
|
||||||
|
|
||||||
|
ptl_size_t frag_size = (size_left > mca_coll_portals4_component.ni_limits.max_msg_size) ?
|
||||||
|
mca_coll_portals4_component.ni_limits.max_msg_size :
|
||||||
|
size_left;
|
||||||
|
|
||||||
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
|
"%s:%d:rank(%d): frag(%lu),offset_frag (%lu) frag_size(%lu)",
|
||||||
|
__FILE__, __LINE__, request->u.scatter.my_rank,
|
||||||
|
j, size_sent, frag_size));
|
||||||
|
|
||||||
ret = PtlTriggeredPut(request->u.scatter.scatter_mdh,
|
ret = PtlTriggeredPut(request->u.scatter.scatter_mdh,
|
||||||
(ptl_size_t)request->u.scatter.scatter_buf + offset,
|
(ptl_size_t)request->u.scatter.scatter_buf + offset + size_sent,
|
||||||
request->u.scatter.packed_size,
|
frag_size,
|
||||||
PTL_NO_ACK_REQ,
|
PTL_NO_ACK_REQ,
|
||||||
ompi_coll_portals4_get_peer(comm, i),
|
ompi_coll_portals4_get_peer(comm, i),
|
||||||
mca_coll_portals4_component.pt_idx,
|
mca_coll_portals4_component.pt_idx,
|
||||||
request->u.scatter.scatter_match_bits,
|
request->u.scatter.scatter_match_bits,
|
||||||
0,
|
size_sent,
|
||||||
NULL,
|
NULL,
|
||||||
0,
|
0,
|
||||||
request->u.scatter.scatter_cth,
|
request->u.scatter.scatter_cth,
|
||||||
expected_chained_rtrs);
|
expected_chained_rtrs);
|
||||||
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
if (PTL_OK != ret) { ret = OMPI_ERROR; line = __LINE__; goto err_hdlr; }
|
||||||
|
|
||||||
|
size_left -= frag_size;
|
||||||
|
size_sent += frag_size;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
/* non-root, so do nothing */
|
/* non-root, so do nothing */
|
||||||
@ -473,7 +498,7 @@ ompi_coll_portals4_scatter_intra_linear_top(const void *sbuf, int scount, struct
|
|||||||
expected_acks = 0;
|
expected_acks = 0;
|
||||||
|
|
||||||
/* operations on the scatter counter */
|
/* operations on the scatter counter */
|
||||||
expected_puts = 1; /* scatter put from root */
|
expected_puts = number_of_fragment; /* scatter put from root */
|
||||||
expected_chained_rtrs = 0;
|
expected_chained_rtrs = 0;
|
||||||
expected_chained_acks = 0;
|
expected_chained_acks = 0;
|
||||||
}
|
}
|
||||||
@ -552,7 +577,7 @@ ompi_coll_portals4_scatter_intra_linear_top(const void *sbuf, int scount, struct
|
|||||||
"completed CTWait(expected_ops=%d)\n", expected_ops);
|
"completed CTWait(expected_ops=%d)\n", expected_ops);
|
||||||
}
|
}
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:scatter_intra_linear_top exit rank %d", request->u.scatter.my_rank));
|
"coll:portals4:scatter_intra_linear_top exit rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -574,7 +599,7 @@ ompi_coll_portals4_scatter_intra_linear_bottom(struct ompi_communicator_t *comm,
|
|||||||
{
|
{
|
||||||
int ret, line;
|
int ret, line;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:scatter_intra_linear_bottom enter rank %d", request->u.scatter.my_rank));
|
"coll:portals4:scatter_intra_linear_bottom enter rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
ret = cleanup_scatter_handles(request);
|
ret = cleanup_scatter_handles(request);
|
||||||
@ -616,7 +641,7 @@ ompi_coll_portals4_scatter_intra_linear_bottom(struct ompi_communicator_t *comm,
|
|||||||
ompi_request_complete(&request->super, true);
|
ompi_request_complete(&request->super, true);
|
||||||
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
OPAL_THREAD_UNLOCK(&ompi_request_lock);
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:scatter_intra_linear_bottom exit rank %d", request->u.scatter.my_rank));
|
"coll:portals4:scatter_intra_linear_bottom exit rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -645,7 +670,7 @@ ompi_coll_portals4_scatter_intra(const void *sbuf, int scount, struct ompi_datat
|
|||||||
|
|
||||||
ompi_coll_portals4_request_t *request;
|
ompi_coll_portals4_request_t *request;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:scatter_intra enter rank %d", ompi_comm_rank(comm)));
|
"coll:portals4:scatter_intra enter rank %d", ompi_comm_rank(comm)));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -679,7 +704,7 @@ ompi_coll_portals4_scatter_intra(const void *sbuf, int scount, struct ompi_datat
|
|||||||
*/
|
*/
|
||||||
OMPI_COLL_PORTALS4_REQUEST_RETURN(request);
|
OMPI_COLL_PORTALS4_REQUEST_RETURN(request);
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:scatter_intra exit rank %d", request->u.scatter.my_rank));
|
"coll:portals4:scatter_intra exit rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -705,7 +730,7 @@ ompi_coll_portals4_iscatter_intra(const void *sbuf, int scount, struct ompi_data
|
|||||||
|
|
||||||
ompi_coll_portals4_request_t *request;
|
ompi_coll_portals4_request_t *request;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:iscatter_intra enter rank %d", ompi_comm_rank(comm)));
|
"coll:portals4:iscatter_intra enter rank %d", ompi_comm_rank(comm)));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -732,7 +757,7 @@ ompi_coll_portals4_iscatter_intra(const void *sbuf, int scount, struct ompi_data
|
|||||||
module);
|
module);
|
||||||
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:iscatter_intra exit rank %d", request->u.scatter.my_rank));
|
"coll:portals4:iscatter_intra exit rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
@ -751,7 +776,7 @@ ompi_coll_portals4_iscatter_intra_fini(ompi_coll_portals4_request_t *request)
|
|||||||
{
|
{
|
||||||
int ret, line;
|
int ret, line;
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:iscatter_intra_fini enter rank %d", request->u.scatter.my_rank));
|
"coll:portals4:iscatter_intra_fini enter rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -760,7 +785,7 @@ ompi_coll_portals4_iscatter_intra_fini(ompi_coll_portals4_request_t *request)
|
|||||||
ret = ompi_coll_portals4_scatter_intra_linear_bottom(request->super.req_mpi_object.comm, request);
|
ret = ompi_coll_portals4_scatter_intra_linear_bottom(request->super.req_mpi_object.comm, request);
|
||||||
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
if (MPI_SUCCESS != ret) { line = __LINE__; goto err_hdlr; }
|
||||||
|
|
||||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output,
|
||||||
"coll:portals4:iscatter_intra_fini exit rank %d", request->u.scatter.my_rank));
|
"coll:portals4:iscatter_intra_fini exit rank %d", request->u.scatter.my_rank));
|
||||||
|
|
||||||
return OMPI_SUCCESS;
|
return OMPI_SUCCESS;
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user