2004-11-22 03:37:56 +03:00
|
|
|
/*
|
2005-11-05 22:57:48 +03:00
|
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
2006-08-23 04:29:35 +04:00
|
|
|
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
2005-11-05 22:57:48 +03:00
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
2004-11-28 23:09:25 +03:00
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
2005-03-24 15:43:37 +03:00
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
2006-11-22 05:06:52 +03:00
|
|
|
* Copyright (c) 2006 Los Alamos National Security, LLC. All rights
|
|
|
|
* reserved.
|
|
|
|
*
|
2004-11-22 04:38:40 +03:00
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
2004-11-22 03:37:56 +03:00
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
2005-04-21 18:58:25 +04:00
|
|
|
/**
|
|
|
|
* @file
|
|
|
|
*
|
|
|
|
* Progress engine for Open MPI
|
|
|
|
*/
|
|
|
|
|
2006-11-22 05:06:52 +03:00
|
|
|
#ifndef OPAL_RUNTIME_OPAL_PROGRESS_H
|
|
|
|
#define OPAL_RUNTIME_OPAL_PROGRESS_H
|
|
|
|
|
2009-08-20 15:42:18 +04:00
|
|
|
BEGIN_C_DECLS
|
2006-11-22 05:06:52 +03:00
|
|
|
|
2009-03-04 18:35:54 +03:00
|
|
|
#include "opal_config.h"
|
2006-03-02 03:39:07 +03:00
|
|
|
#include "opal/threads/mutex.h"
|
2004-10-28 19:40:46 +04:00
|
|
|
|
2005-03-30 05:40:26 +04:00
|
|
|
/**
|
|
|
|
* Initialize the progress engine
|
|
|
|
*
|
|
|
|
* Initialize the progress engine, including constructing the
|
|
|
|
* proper locks and allocating space for the progress registration
|
|
|
|
* functions. At this point, any function in the progress engine
|
|
|
|
* interface may be called.
|
|
|
|
*/
|
2006-08-23 04:29:35 +04:00
|
|
|
OPAL_DECLSPEC int opal_progress_init(void);
|
2005-02-16 20:42:07 +03:00
|
|
|
|
2005-03-30 05:40:26 +04:00
|
|
|
|
2005-04-14 22:55:53 +04:00
|
|
|
/**
|
2006-11-22 05:06:52 +03:00
|
|
|
* Shut down the progress engine
|
2005-04-21 18:58:25 +04:00
|
|
|
*
|
2006-11-22 05:06:52 +03:00
|
|
|
* Shut down the progress engine. This includes deregistering all
|
|
|
|
* registered callbacks and freeing all resources. After finalize
|
|
|
|
* returns, no calls into the progress interface are allowed.
|
2005-04-14 22:55:53 +04:00
|
|
|
*/
|
2006-11-22 05:06:52 +03:00
|
|
|
OPAL_DECLSPEC int opal_progress_finalize(void);
|
|
|
|
|
2005-04-14 22:55:53 +04:00
|
|
|
|
2005-03-30 05:40:26 +04:00
|
|
|
/**
|
2006-11-22 05:06:52 +03:00
|
|
|
* Progress all pending events
|
2005-04-21 18:58:25 +04:00
|
|
|
*
|
2006-11-22 05:06:52 +03:00
|
|
|
* Progress all pending events. All registered event handlers will be
|
|
|
|
* called every call into opal_progress(). The event library will be
|
|
|
|
* called if opal_progress_event_users is greater than 0 (adjustments
|
|
|
|
* can be made by calling opal_progress_event_users_add() and
|
|
|
|
* opal_progress_event_users_delete()) or the time since the last call
|
|
|
|
* into the event library is greater than the progress tick rate (by
|
|
|
|
* default, 10ms).
|
2005-03-30 05:40:26 +04:00
|
|
|
*/
|
2006-11-22 05:06:52 +03:00
|
|
|
OPAL_DECLSPEC void opal_progress(void);
|
2005-03-30 05:40:26 +04:00
|
|
|
|
2006-11-22 05:06:52 +03:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Control how the event library is called
|
2005-03-30 05:40:26 +04:00
|
|
|
*
|
2006-11-22 05:06:52 +03:00
|
|
|
* Adjust the flags argument used to call opal_event_loop() from
|
|
|
|
* opal_progress(). The default argument is OPAL_EVLOOP_ONELOOP,
|
|
|
|
* meaning that the call to opal_event_loop() will block pending
|
|
|
|
* events, but may block for a period of time.
|
|
|
|
*
|
|
|
|
* @param flags One of the valid vlags argument to
|
|
|
|
* opal_event_loop().
|
|
|
|
* @return Previous value of flags used to call
|
|
|
|
* opal_event_loop().
|
2005-03-30 05:40:26 +04:00
|
|
|
*/
|
2006-11-22 05:06:52 +03:00
|
|
|
OPAL_DECLSPEC int opal_progress_set_event_flag(int flags);
|
|
|
|
|
2005-03-30 05:40:26 +04:00
|
|
|
|
|
|
|
/**
|
2006-11-22 05:06:52 +03:00
|
|
|
* Increase the number of users of the event library
|
|
|
|
*
|
|
|
|
* Increase the number of users of the event library. This count is
|
|
|
|
* used by opal_progress to determine if opal_event_loop() should be
|
|
|
|
* called every call to opal_progress() or only after a time has
|
|
|
|
* elapsed since the last call (by default, 10ms). The count defaults
|
|
|
|
* to 0, meaning that opal_progress_event_users_increment() must be
|
|
|
|
* called at least once for the event loop to be called on every entry
|
|
|
|
* to opal_progress().
|
|
|
|
*
|
2005-03-30 05:40:26 +04:00
|
|
|
*/
|
2006-11-22 05:06:52 +03:00
|
|
|
OPAL_DECLSPEC void opal_progress_event_users_increment(void);
|
|
|
|
|
2004-10-28 19:40:46 +04:00
|
|
|
|
2005-03-30 05:40:26 +04:00
|
|
|
/**
|
2006-11-22 05:06:52 +03:00
|
|
|
* Decrease the number of users of the event library
|
|
|
|
*
|
|
|
|
* Decrease the number of users of the event library. This count is
|
|
|
|
* used by opal_progress to determine if opal_event_loop() should be
|
|
|
|
* called every call to opal_progress() or only after a time has
|
|
|
|
* elapsed since the last call (by default, 10ms).
|
2005-03-30 05:40:26 +04:00
|
|
|
*/
|
2006-11-22 05:06:52 +03:00
|
|
|
OPAL_DECLSPEC void opal_progress_event_users_decrement(void);
|
2004-10-28 19:40:46 +04:00
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
|
2005-03-30 05:40:26 +04:00
|
|
|
/**
|
2006-11-22 05:06:52 +03:00
|
|
|
* Set whether opal_progress() should yield when idle
|
|
|
|
*
|
|
|
|
* Set whether opal_progress() should yield the processor (either by
|
|
|
|
* sched_yield() or SwitchToThread()) if no events were progressed
|
|
|
|
* during the progress loop. The return value of the callback
|
|
|
|
* functions is used to determine whether or not yielding is required.
|
|
|
|
* By default, the event loop will yield when the progress function is
|
|
|
|
* idle.
|
|
|
|
*
|
|
|
|
* @param yieldopt Whether to yield when idle.
|
|
|
|
* @return Previous value of the yield_when_idle option.
|
2005-03-30 05:40:26 +04:00
|
|
|
*/
|
2006-11-22 05:06:52 +03:00
|
|
|
OPAL_DECLSPEC bool opal_progress_set_yield_when_idle(bool yieldopt);
|
2005-03-14 23:57:21 +03:00
|
|
|
|
2005-03-30 05:40:26 +04:00
|
|
|
|
|
|
|
/**
|
2006-11-22 05:06:52 +03:00
|
|
|
* Set time between calls into the event library
|
|
|
|
*
|
|
|
|
* Set time between calls into the event library when there are no
|
|
|
|
* users of the event library (set by
|
|
|
|
* opal_progress_event_users_increment() and
|
|
|
|
* opal_progress_event_users_decrement()).
|
|
|
|
*
|
|
|
|
* @param polltime Time (in microseconds) between calls to the event
|
|
|
|
* library
|
2005-03-30 05:40:26 +04:00
|
|
|
*/
|
2006-11-22 05:06:52 +03:00
|
|
|
OPAL_DECLSPEC void opal_progress_set_event_poll_rate(int microseconds);
|
2005-03-18 06:43:59 +03:00
|
|
|
|
2005-03-30 05:40:26 +04:00
|
|
|
|
2005-04-21 18:58:25 +04:00
|
|
|
/**
|
2006-11-22 05:06:52 +03:00
|
|
|
* Progress callback function typedef
|
|
|
|
*
|
|
|
|
* Prototype for the a progress function callback. Progress function
|
|
|
|
* callbacks can be registered with opal_progress_register() and
|
|
|
|
* deregistered with opal_progress_deregister(). It should be noted
|
|
|
|
* that either registering or deregistering a function callback is an
|
|
|
|
* extraordinarily expensive operation and should not be used for
|
|
|
|
* potentially short callback lifetimes.
|
|
|
|
*
|
|
|
|
* @return Number of events progressed during the callback
|
|
|
|
*/
|
|
|
|
typedef int (*opal_progress_callback_t)(void);
|
|
|
|
|
2005-04-21 18:58:25 +04:00
|
|
|
|
|
|
|
/**
|
2006-11-22 05:06:52 +03:00
|
|
|
* Register an event to be progressed
|
|
|
|
*
|
|
|
|
* Register an event to be progressed during calls to opal_progress().
|
|
|
|
* Please read the note in opal_progress_callback_t.
|
|
|
|
*/
|
|
|
|
OPAL_DECLSPEC int opal_progress_register(opal_progress_callback_t cb);
|
2005-04-21 18:58:25 +04:00
|
|
|
|
2006-03-02 03:39:07 +03:00
|
|
|
|
|
|
|
/**
|
2006-11-22 05:06:52 +03:00
|
|
|
* Deregister previously registered event
|
|
|
|
*
|
|
|
|
* Deregister an event to be progressed during calls to opal_progress().
|
|
|
|
* Please read the note in opal_progress_callback_t.
|
2006-03-02 03:39:07 +03:00
|
|
|
*/
|
2006-11-22 05:06:52 +03:00
|
|
|
OPAL_DECLSPEC int opal_progress_unregister(opal_progress_callback_t cb);
|
|
|
|
|
2006-03-02 03:39:07 +03:00
|
|
|
|
2006-08-23 04:29:35 +04:00
|
|
|
OPAL_DECLSPEC extern volatile int32_t opal_progress_thread_count;
|
|
|
|
OPAL_DECLSPEC extern int opal_progress_spin_count;
|
2006-03-02 03:39:07 +03:00
|
|
|
|
|
|
|
static inline bool opal_progress_threads(void)
|
|
|
|
{
|
|
|
|
return (opal_progress_thread_count > 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-11-22 05:06:52 +03:00
|
|
|
/**
|
|
|
|
* Progress until flag is true or poll iterations completed
|
|
|
|
*/
|
2006-03-02 03:39:07 +03:00
|
|
|
static inline bool opal_progress_spin(volatile bool* complete)
|
|
|
|
{
|
|
|
|
int32_t c;
|
|
|
|
OPAL_THREAD_ADD32(&opal_progress_thread_count,1);
|
|
|
|
for (c = 0; c < opal_progress_spin_count; c++) {
|
|
|
|
if (true == *complete) {
|
|
|
|
OPAL_THREAD_ADD32(&opal_progress_thread_count,-1);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
opal_progress();
|
|
|
|
}
|
|
|
|
OPAL_THREAD_ADD32(&opal_progress_thread_count,-1);
|
2006-03-06 21:14:17 +03:00
|
|
|
return false;
|
2006-03-02 03:39:07 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
Fix a potential, albeit perhaps esoteric, race condition that can occur for fast HNP's, slow orteds, and fast apps. Under those conditions, it is possible for the orted to be caught in its original send of contact info back to the HNP, and thus for the progress stack never to recover back to a high level. In those circumstances, the orted can "hang" when trying to exit.
Add a new function to opal_progress that tells us our recursion depth to support that solution.
Yes, I know this sounds picky, but good ol' Jeff managed to make it happen by driving his cluster near to death...
Also ensure that we declare "failed" for the daemon job when daemons fail instead of the application job. This is important so that orte knows that it cannot use xcast to tell daemons to "exit", nor should it expect all daemons to respond. Otherwise, it is possible to hang.
After lots of testing, decide to default (again) to slurm detecting failed orteds. This proved necessary to avoid rather annoying hangs that were difficult to recover from. There are conditions where slurm will fail to launch all daemons (slurm folks are working on it), and yet again, good ol' Jeff managed to find both of them.
Thanks you Jeff! :-/
This commit was SVN r18611.
2008-06-06 23:36:27 +04:00
|
|
|
/**
|
|
|
|
* \internal
|
|
|
|
* Don't use this variable; use the opal_progress_recursion_depth()
|
|
|
|
* function.
|
|
|
|
*/
|
|
|
|
OPAL_DECLSPEC extern
|
2009-05-07 00:11:28 +04:00
|
|
|
#if OPAL_HAVE_THREAD_SUPPORT
|
Fix a potential, albeit perhaps esoteric, race condition that can occur for fast HNP's, slow orteds, and fast apps. Under those conditions, it is possible for the orted to be caught in its original send of contact info back to the HNP, and thus for the progress stack never to recover back to a high level. In those circumstances, the orted can "hang" when trying to exit.
Add a new function to opal_progress that tells us our recursion depth to support that solution.
Yes, I know this sounds picky, but good ol' Jeff managed to make it happen by driving his cluster near to death...
Also ensure that we declare "failed" for the daemon job when daemons fail instead of the application job. This is important so that orte knows that it cannot use xcast to tell daemons to "exit", nor should it expect all daemons to respond. Otherwise, it is possible to hang.
After lots of testing, decide to default (again) to slurm detecting failed orteds. This proved necessary to avoid rather annoying hangs that were difficult to recover from. There are conditions where slurm will fail to launch all daemons (slurm folks are working on it), and yet again, good ol' Jeff managed to find both of them.
Thanks you Jeff! :-/
This commit was SVN r18611.
2008-06-06 23:36:27 +04:00
|
|
|
volatile
|
|
|
|
#endif
|
|
|
|
uint32_t opal_progress_recursion_depth_counter;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Return the current level of recursion -- 0 means that we are not
|
|
|
|
* under an opal_progress() call at all. 1 means that you're in the
|
|
|
|
* top-level opal_progress() function (i.e., not deep in recursion).
|
|
|
|
* Higher values mean that you're that many levels deep in recursion.
|
|
|
|
*/
|
|
|
|
static inline uint32_t opal_progress_recursion_depth(void)
|
|
|
|
{
|
|
|
|
return opal_progress_recursion_depth_counter;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-08-20 15:42:18 +04:00
|
|
|
END_C_DECLS
|
2004-04-06 20:32:40 +04:00
|
|
|
|
|
|
|
#endif
|
|
|
|
|