1
1

Move the comment at the right place.

This commit was SVN r14237.
Этот коммит содержится в:
George Bosilca 2007-04-05 20:36:33 +00:00
родитель 5c355d0bea
Коммит 33bf6c6e54

Просмотреть файл

@ -477,8 +477,18 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
if (mca_pls_rsh_component.debug_daemons && if (mca_pls_rsh_component.debug_daemons &&
mca_pls_rsh_component.num_concurrent < num_nodes) { mca_pls_rsh_component.num_concurrent < num_nodes) {
/* we can't run in this situation, so pretty print the error /**
* and exit * If we are in '--debug-daemons' we keep the ssh connection
* alive for the span of the run. If we use this option
* AND we launch on more than "num_concurrent" machines
* then we will deadlock. No connections are terminated
* until the job is complete, no job is started
* since all the orteds are waiting for all the others
* to come online, and the others ore not launched because
* we are waiting on those that have started to terminate
* their ssh tunnels. :(
* As we cannot run in this situation, pretty print the error
* and return an error code.
*/ */
opal_show_help("help-pls-rsh.txt", "deadlock-params", opal_show_help("help-pls-rsh.txt", "deadlock-params",
true, mca_pls_rsh_component.num_concurrent, num_nodes); true, mca_pls_rsh_component.num_concurrent, num_nodes);
@ -1084,16 +1094,9 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
} else { /* father */ } else { /* father */
OPAL_THREAD_LOCK(&mca_pls_rsh_component.lock); OPAL_THREAD_LOCK(&mca_pls_rsh_component.lock);
/* JJH Bug: /* This situation can lead to a deadlock if '--debug-daemons' is set.
* If we are in '--debug-daemons' we keep the ssh connection * However, the deadlock condition is tested at the begining of this
* alive for the span of the run. If we use this option * function, so we're quite confident it should not happens here.
* AND we launch on more than "num_concurrent" machines
* then we will deadlock. No connections are terminated
* until the job is complete, no job is started
* since all the orteds are waiting for all the others
* to come online, and the others ore not launched because
* we are waiting on those that have started to terminate
* their ssh tunnels. :(
*/ */
if (mca_pls_rsh_component.num_children++ >= if (mca_pls_rsh_component.num_children++ >=
mca_pls_rsh_component.num_concurrent) { mca_pls_rsh_component.num_concurrent) {