Ensure we cancel the lingering recv in the allgather code to avoid having incorrect counters.
Thanks to Damien for spotting the problem. This commit was SVN r22301.
Этот коммит содержится в:
родитель
27cc40e412
Коммит
0ffa4f2f0c
@ -376,6 +376,9 @@ static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf)
|
||||
|
||||
ORTE_PROGRESSED_WAIT(false, allgather_num_recvd, 1);
|
||||
|
||||
/* cancel the lingering recv */
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER);
|
||||
|
||||
/* copy payload to the caller's buffer */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.copy_payload(rbuf, &allgather_buf))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -404,6 +407,9 @@ static int allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf)
|
||||
|
||||
ORTE_PROGRESSED_WAIT(false, allgather_num_recvd, num_local_peers);
|
||||
|
||||
/* cancel the lingering recv */
|
||||
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER);
|
||||
|
||||
/* take the recv'd data and use one of the base collectives
|
||||
* to exchange it with all other local_rank=0 procs in a scalable
|
||||
* manner - the exact collective will depend upon the number of
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user