1
1

Fixes trac:1160. There is still some other problem in the OOB, but we

wanted to commit this to get wider testing.

This commit was SVN r16445.

The following Trac tickets were found above:
  Ticket 1160 --> https://svn.open-mpi.org/trac/ompi/ticket/1160
Этот коммит содержится в:
Jeff Squyres 2007-10-15 15:41:36 +00:00
родитель f16a42947a
Коммит 423f23eb6a
3 изменённых файлов: 31 добавлений и 13 удалений

Просмотреть файл

@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -21,6 +22,7 @@
#include "opal/threads/condition.h"
#include "opal/threads/mutex.h"
#include "opal/event/event.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/oob/oob.h"
#include "orte/dss/dss_types.h"
@ -34,6 +36,8 @@ struct orte_rml_oob_module_t {
opal_mutex_t exceptions_lock;
opal_list_t queued_routing_messages;
opal_mutex_t queued_lock;
opal_event_t *timer_event;
struct timeval timeout;
};
typedef struct orte_rml_oob_module_t orte_rml_oob_module_t;

Просмотреть файл

@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -40,7 +41,7 @@ static void rml_oob_recv_route_callback(int status,
int count,
orte_rml_tag_t tag,
void *cbdata);
static void rml_oob_queued_progress(int fd, short event, void *arg);
/**
* component definition
@ -138,6 +139,15 @@ rml_oob_init(int* priority)
OBJ_CONSTRUCT(&orte_rml_oob_module.exceptions_lock, opal_mutex_t);
OBJ_CONSTRUCT(&orte_rml_oob_module.queued_routing_messages, opal_list_t);
OBJ_CONSTRUCT(&orte_rml_oob_module.queued_lock, opal_mutex_t);
/* Set default timeout for queued messages to be 1/2 second */
orte_rml_oob_module.timeout.tv_sec = 0;
orte_rml_oob_module.timeout.tv_usec = 500000;
orte_rml_oob_module.timer_event = malloc(sizeof(opal_event_t));
if (NULL == orte_rml_oob_module.timer_event) {
return NULL;
}
opal_evtimer_set(orte_rml_oob_module.timer_event, rml_oob_queued_progress,
NULL);
orte_rml_oob_module.active_oob = &mca_oob;
orte_rml_oob_module.active_oob->oob_exception_callback =
@ -315,22 +325,18 @@ rml_oob_recv_route_queued_send_callback(int status,
}
static int
rml_oob_queued_progress(void)
static void
rml_oob_queued_progress(int fd, short event, void *arg)
{
orte_rml_oob_queued_msg_t *qmsg;
orte_rml_oob_msg_header_t *hdr;
int real_tag;
int ret;
orte_process_name_t next, origin;
int count = 0;
while (true) {
OPAL_THREAD_LOCK(&orte_rml_oob_module.queued_lock);
qmsg = (orte_rml_oob_queued_msg_t*) opal_list_remove_first(&orte_rml_oob_module.queued_routing_messages);
if (0 == opal_list_get_size(&orte_rml_oob_module.queued_routing_messages)) {
opal_progress_unregister(rml_oob_queued_progress);
}
OPAL_THREAD_UNLOCK(&orte_rml_oob_module.queued_lock);
if (NULL == qmsg) break;
@ -385,7 +391,8 @@ rml_oob_queued_progress(void)
opal_list_append(&orte_rml_oob_module.queued_routing_messages,
&qmsg->super);
if (1 == opal_list_get_size(&orte_rml_oob_module.queued_routing_messages)) {
opal_progress_register(rml_oob_queued_progress);
opal_evtimer_add(orte_rml_oob_module.timer_event,
&orte_rml_oob_module.timeout);
}
OPAL_THREAD_UNLOCK(&orte_rml_oob_module.queued_lock);
} else {
@ -398,11 +405,7 @@ rml_oob_queued_progress(void)
abort();
}
}
count++;
}
return count;
}
static void
@ -495,7 +498,8 @@ rml_oob_recv_route_callback(int status,
opal_list_append(&orte_rml_oob_module.queued_routing_messages,
&qmsg->super);
if (1 == opal_list_get_size(&orte_rml_oob_module.queued_routing_messages)) {
opal_progress_register(rml_oob_queued_progress);
opal_evtimer_add(orte_rml_oob_module.timer_event,
&orte_rml_oob_module.timeout);
}
OPAL_THREAD_UNLOCK(&orte_rml_oob_module.queued_lock);
} else {

Просмотреть файл

@ -502,6 +502,16 @@ int orte_daemon(int argc, char *argv[])
require OOB messages for wireup, etc.). */
opal_progress_set_yield_when_idle(false);
/* Change the default behavior of libevent such that we want to
continually block rather than blocking for the default timeout
and then looping around the progress engine again. There
should be nothing in the orted that cannot block in libevent
until "something" happens (i.e., there's no need to keep
cycling through progress because the only things that should
happen will happen in libevent). This is a minor optimization,
but what the heck... :-) */
opal_progress_set_event_flag(OPAL_EVLOOP_ONCE);
/* if requested, obtain and report a new process name and my uri to the indicated pipe */
if (orted_globals.uri_pipe > 0) {
orte_process_name_t name;