1
1

Don't error out if the recv.cancel comes back not found as this is just a race condition

This commit was SVN r24841.
Этот коммит содержится в:
Ralph Castain 2011-06-30 01:19:50 +00:00
родитель 6e7e7be53e
Коммит 2e1fa3e08e

Просмотреть файл

@ -27,7 +27,6 @@
#endif /* HAVE_SYS_TIME_H */
#include "opal/util/output.h"
#include "opal/util/opal_sos.h"
#include "opal/dss/dss.h"
#include "orte/mca/errmgr/errmgr.h"
@ -68,7 +67,7 @@ static void allgather_server_recv(int status, orte_process_name_t* sender,
/* reissue the recv */
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER_LIST,
ORTE_RML_NON_PERSISTENT, allgather_server_recv, NULL);
if (rc != ORTE_SUCCESS && OPAL_SOS_GET_ERROR_CODE(rc) != ORTE_ERR_NOT_IMPLEMENTED) {
if (rc != ORTE_SUCCESS && rc != ORTE_ERR_NOT_IMPLEMENTED) {
ORTE_ERROR_LOG(rc);
allgather_failed = true;
}
@ -148,13 +147,16 @@ int orte_grpcomm_base_allgather_list(opal_list_t *names, opal_buffer_t *sbuf, op
allgather_failed = false;
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER_LIST,
ORTE_RML_NON_PERSISTENT, allgather_client_recv, NULL);
if (rc != ORTE_SUCCESS && OPAL_SOS_GET_ERROR_CODE(rc) != ORTE_ERR_NOT_IMPLEMENTED) {
if (rc != ORTE_SUCCESS && rc != ORTE_ERR_NOT_IMPLEMENTED) {
ORTE_ERROR_LOG(rc);
return rc;
}
ORTE_PROGRESSED_WAIT(allgather_failed, allgather_num_recvd, 1);
/* cancel the lingering recv */
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER_LIST);
/* if the allgather failed, return an error */
if (allgather_failed) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
@ -209,7 +211,7 @@ int orte_grpcomm_base_allgather_list(opal_list_t *names, opal_buffer_t *sbuf, op
/* post the non-blocking recv */
rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER_LIST,
ORTE_RML_NON_PERSISTENT, allgather_server_recv, NULL);
if (rc != ORTE_SUCCESS && OPAL_SOS_GET_ERROR_CODE(rc) != ORTE_ERR_NOT_IMPLEMENTED) {
if (rc != ORTE_SUCCESS && rc != ORTE_ERR_NOT_IMPLEMENTED) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -217,11 +219,7 @@ int orte_grpcomm_base_allgather_list(opal_list_t *names, opal_buffer_t *sbuf, op
ORTE_PROGRESSED_WAIT(allgather_failed, allgather_num_recvd, num_peers-1);
/* cancel the lingering recv */
if (ORTE_SUCCESS != (rc = orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER_LIST))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(allgather_buf);
return rc;
}
orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER_LIST);
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base.output,
"%s allgather_list: received all data",