Clear an erroneous error message pending a more complete fix
This commit was SVN r14698.
Этот коммит содержится в:
родитель
a65d1d6acc
Коммит
180c96bb8f
@ -80,9 +80,27 @@ int orte_gpr_replica_remote_notify(orte_process_name_t *recipient,
|
|||||||
|
|
||||||
if (0 > orte_rml.send_buffer_nb(recipient, buffer, ORTE_RML_TAG_GPR_NOTIFY, 0,
|
if (0 > orte_rml.send_buffer_nb(recipient, buffer, ORTE_RML_TAG_GPR_NOTIFY, 0,
|
||||||
orte_gpr_replica_remote_send_cb, NULL)) {
|
orte_gpr_replica_remote_send_cb, NULL)) {
|
||||||
|
#if 0
|
||||||
|
/* temporarily disable this error report
|
||||||
|
* With the new orted-failed-to-start code, we hold a caller in
|
||||||
|
* the rmgr.spawn function until either the app launches or
|
||||||
|
* it fails. Failure is indicated by a subscription to NUM_TERMINATED.
|
||||||
|
* However, that means that a notify_msg is going to get sent to a
|
||||||
|
* remote process during comm_spawn once all procs terminate. Since
|
||||||
|
* that process will have terminated, and the HNP processes the trigger
|
||||||
|
* first, the notify_msg send will fail as the recipient will have
|
||||||
|
* terminated and exited.
|
||||||
|
*
|
||||||
|
* A proper fix will require that we do something different
|
||||||
|
* in rmgr_proxy.spawn so we don't get a callback after the
|
||||||
|
* process is done
|
||||||
|
*/
|
||||||
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
|
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
|
||||||
|
opal_output(0, "send failed to [%ld,%ld,%ld]", ORTE_NAME_ARGS(recipient));
|
||||||
|
orte_dss.dump(0, message, ORTE_GPR_NOTIFY_MSG);
|
||||||
OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex);
|
OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex);
|
||||||
return ORTE_ERR_COMM_FAILURE;
|
return ORTE_ERR_COMM_FAILURE;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex);
|
OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex);
|
||||||
|
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user