2004-08-28 05:15:19 +04:00
|
|
|
/*
|
2005-11-05 22:57:48 +03:00
|
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
2004-11-28 23:09:25 +03:00
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
2005-03-24 15:43:37 +03:00
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
2004-11-22 04:38:40 +03:00
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
2004-08-28 05:15:19 +04:00
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
|
|
|
/** @file **/
|
|
|
|
|
2006-02-12 04:33:29 +03:00
|
|
|
#include "orte_config.h"
|
2005-08-15 22:25:35 +04:00
|
|
|
|
2004-09-10 02:31:08 +04:00
|
|
|
#include <string.h>
|
2005-08-15 22:25:35 +04:00
|
|
|
|
2005-07-04 03:31:27 +04:00
|
|
|
#include "opal/util/output.h"
|
2005-07-04 03:09:55 +04:00
|
|
|
#include "opal/event/event.h"
|
2005-07-04 02:45:48 +04:00
|
|
|
#include "opal/threads/mutex.h"
|
|
|
|
#include "opal/threads/condition.h"
|
2005-08-27 01:03:41 +04:00
|
|
|
#include "orte/util/sys_info.h"
|
2005-08-15 22:25:35 +04:00
|
|
|
#include "orte/runtime/runtime.h"
|
2006-02-12 04:33:29 +03:00
|
|
|
#include "orte/orte_constants.h"
|
2005-08-15 22:25:35 +04:00
|
|
|
#include "orte/util/proc_info.h"
|
|
|
|
#include "orte/mca/ns/ns_types.h"
|
|
|
|
#include "orte/mca/gpr/gpr_types.h"
|
2004-08-28 05:15:19 +04:00
|
|
|
|
|
|
|
|
2005-07-04 02:45:48 +04:00
|
|
|
static opal_mutex_t ompi_rte_mutex;
|
|
|
|
static opal_condition_t ompi_rte_condition;
|
2004-08-28 05:15:19 +04:00
|
|
|
static bool ompi_rte_job_started = false;
|
|
|
|
static bool ompi_rte_job_finished = false;
|
2004-09-23 20:12:45 +04:00
|
|
|
static bool ompi_rte_waiting = false;
|
2004-08-28 05:15:19 +04:00
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Change state as processes register/unregister. Note that we could save
|
|
|
|
* the list of registrations - and use the host/pid for cleanup later.
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
void orte_all_procs_registered(orte_gpr_notify_message_t* match, void* cbdata)
|
2004-08-28 05:15:19 +04:00
|
|
|
{
|
2005-03-14 23:57:21 +03:00
|
|
|
if (orte_debug_flag) {
|
2005-07-04 03:31:27 +04:00
|
|
|
opal_output(0, "[%lu,%lu,%lu] all procs registered",
|
2005-03-14 23:57:21 +03:00
|
|
|
ORTE_NAME_ARGS(orte_process_info.my_name));
|
2004-11-20 22:12:43 +03:00
|
|
|
}
|
|
|
|
|
2005-07-04 02:45:48 +04:00
|
|
|
OPAL_THREAD_LOCK(&ompi_rte_mutex);
|
2004-08-28 05:15:19 +04:00
|
|
|
ompi_rte_job_started = true;
|
2004-09-30 01:54:57 +04:00
|
|
|
if (ompi_rte_waiting) {
|
2005-07-04 02:45:48 +04:00
|
|
|
opal_condition_signal(&ompi_rte_condition);
|
2004-09-30 01:54:57 +04:00
|
|
|
}
|
2005-07-04 02:45:48 +04:00
|
|
|
OPAL_THREAD_UNLOCK(&ompi_rte_mutex);
|
2004-08-28 05:15:19 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
void orte_all_procs_unregistered(orte_gpr_notify_message_t* match, void* cbdata)
|
2004-08-28 05:15:19 +04:00
|
|
|
{
|
2005-07-04 02:45:48 +04:00
|
|
|
OPAL_THREAD_LOCK(&ompi_rte_mutex);
|
2004-08-28 05:15:19 +04:00
|
|
|
ompi_rte_job_finished = true;
|
2004-09-23 20:12:45 +04:00
|
|
|
if (ompi_rte_waiting) {
|
2005-07-04 02:45:48 +04:00
|
|
|
opal_condition_signal(&ompi_rte_condition);
|
2004-09-23 20:12:45 +04:00
|
|
|
}
|
2005-07-04 02:45:48 +04:00
|
|
|
OPAL_THREAD_UNLOCK(&ompi_rte_mutex);
|
2004-08-28 05:15:19 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* TSW - This is a temporary solution - that only handles graceful
|
|
|
|
* shutdown....
|
|
|
|
*/
|
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
int orte_monitor_procs_registered(void)
|
2004-08-28 05:15:19 +04:00
|
|
|
{
|
|
|
|
struct timeval tv;
|
|
|
|
struct timespec ts;
|
|
|
|
|
2005-07-04 02:45:48 +04:00
|
|
|
OBJ_CONSTRUCT(&ompi_rte_mutex, opal_mutex_t);
|
|
|
|
OBJ_CONSTRUCT(&ompi_rte_condition, opal_condition_t);
|
2004-08-28 05:15:19 +04:00
|
|
|
|
|
|
|
/* block until a timeout occurs or all processes have registered */
|
|
|
|
gettimeofday(&tv, NULL);
|
2004-09-30 01:54:57 +04:00
|
|
|
ts.tv_sec = tv.tv_sec + 1000000;
|
2004-08-28 05:15:19 +04:00
|
|
|
ts.tv_nsec = 0;
|
|
|
|
|
2005-07-04 02:45:48 +04:00
|
|
|
OPAL_THREAD_LOCK(&ompi_rte_mutex);
|
2004-08-28 05:15:19 +04:00
|
|
|
if(ompi_rte_job_started == false) {
|
2004-09-30 01:54:57 +04:00
|
|
|
ompi_rte_waiting = true;
|
2005-07-04 02:45:48 +04:00
|
|
|
opal_condition_timedwait(&ompi_rte_condition, &ompi_rte_mutex, &ts);
|
2004-09-30 01:54:57 +04:00
|
|
|
ompi_rte_waiting = false;
|
2004-08-28 05:15:19 +04:00
|
|
|
if(ompi_rte_job_started == false) {
|
2005-07-04 02:45:48 +04:00
|
|
|
opal_mutex_unlock(&ompi_rte_mutex);
|
2005-08-15 22:25:35 +04:00
|
|
|
return ORTE_ERROR;
|
2004-08-28 05:15:19 +04:00
|
|
|
}
|
|
|
|
}
|
2005-07-04 02:45:48 +04:00
|
|
|
OPAL_THREAD_UNLOCK(&ompi_rte_mutex);
|
2005-08-15 22:25:35 +04:00
|
|
|
return ORTE_SUCCESS;
|
2004-09-23 18:33:28 +04:00
|
|
|
}
|
2004-08-28 05:15:19 +04:00
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
int orte_monitor_procs_unregistered(void)
|
2004-09-23 18:33:28 +04:00
|
|
|
{
|
2005-07-04 02:45:48 +04:00
|
|
|
OPAL_THREAD_LOCK(&ompi_rte_mutex);
|
2004-08-28 05:15:19 +04:00
|
|
|
/* wait for all processes to complete */
|
|
|
|
while(ompi_rte_job_finished == false) {
|
2004-09-23 20:12:45 +04:00
|
|
|
ompi_rte_waiting = true;
|
2005-07-04 02:45:48 +04:00
|
|
|
opal_condition_wait(&ompi_rte_condition, &ompi_rte_mutex);
|
2004-09-30 01:54:57 +04:00
|
|
|
ompi_rte_waiting = false;
|
2004-08-28 05:15:19 +04:00
|
|
|
}
|
2004-09-23 20:12:45 +04:00
|
|
|
|
2005-07-04 02:45:48 +04:00
|
|
|
OPAL_THREAD_UNLOCK(&ompi_rte_mutex);
|
2005-08-15 22:25:35 +04:00
|
|
|
return ORTE_SUCCESS;
|
2004-08-28 05:15:19 +04:00
|
|
|
}
|
|
|
|
|