1
1

Rework the opal_progress interface to better support dynamic processes and at

the same time, remove some of the MPI-related options from OPAL:

  - provide mechanism to change at runtime whether sched_yield() should 
    be called when the progress engine is idle
  - provide mechanism for changing the rate at which the event engine
    is called when there are "no" users of the event engine (ie, when
    using MPI but not TCP)
  - fix some function names in the progress engine to better match
    their intended use (and remove MPI naming scheme)
  - remove progress_mpi_enable / progress_mpi_disable because 
    we can now use the functions to set the sched_yield and
    tick rate interfaces
  - rename opal_progress_events() to opal_progress_set_event_flag()
    because the first really isn't descriptive of what the function
    does and I always got confused by it

This commit was SVN r12645.
Этот коммит содержится в:
Brian Barrett 2006-11-22 02:06:52 +00:00
родитель 9f3dcd147a
Коммит 33320b7165
11 изменённых файлов: 308 добавлений и 247 удалений

Просмотреть файл

@ -14,7 +14,7 @@ Copyright (c) 2004-2006 High Performance Computing Center Stuttgart,
University of Stuttgart. All rights reserved.
Copyright (c) 2004-2006 The Regents of the University of California.
All rights reserved.
й Copyright 2006 Los Alamos National Security, LLC. All rights
Copyright (c) 2006 Los Alamos National Security, LLC. All rights
reserved.
Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
Copyright (c) 2006 Voltaire, Inc. All rights reserved.

Просмотреть файл

@ -10,6 +10,9 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 University of Houston. All rights reserved.
* Copyright (c) 2006 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -83,7 +86,7 @@ int ompi_comm_connect_accept ( ompi_communicator_t *comm, int root,
/* tell the progress engine to tick the event library more
often, to make sure that the OOB messages get sent */
opal_progress_event_increment();
opal_progress_event_users_increment();
if ( rank == root ) {
/* The process receiving first does not have yet the contact
@ -243,8 +246,7 @@ int ompi_comm_connect_accept ( ompi_communicator_t *comm, int root,
exit:
/* done with OOB and such - slow our tick rate again */
opal_progress();
opal_progress_event_decrement();
opal_progress_event_users_decrement();
if ( NULL != rprocs ) {
free ( rprocs );
@ -380,7 +382,7 @@ ompi_comm_start_processes(int count, char **array_of_commands,
*/
/* make sure the progress engine properly trips the event library */
opal_progress_event_increment();
opal_progress_event_users_increment();
/* check to see if we want timing information */
param = mca_base_param_reg_int_name("ompi", "timing",
@ -438,7 +440,7 @@ ompi_comm_start_processes(int count, char **array_of_commands,
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
/* rollback what was already done */
for (j=0; j < i; j++) OBJ_RELEASE(apps[j]);
opal_progress_event_decrement();
opal_progress_event_users_decrement();
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* copy over the name of the executable */
@ -447,7 +449,7 @@ ompi_comm_start_processes(int count, char **array_of_commands,
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
/* rollback what was already done */
for (j=0; j < i; j++) OBJ_RELEASE(apps[j]);
opal_progress_event_decrement();
opal_progress_event_users_decrement();
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* record the number of procs to be generated */
@ -474,7 +476,7 @@ ompi_comm_start_processes(int count, char **array_of_commands,
for (j=0; j < i; j++) {
OBJ_RELEASE(apps[j]);
}
opal_progress_event_decrement();
opal_progress_event_users_decrement();
return ORTE_ERR_OUT_OF_RESOURCE;
}
apps[i]->argv[0] = strdup(array_of_commands[i]);
@ -497,7 +499,7 @@ ompi_comm_start_processes(int count, char **array_of_commands,
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
/* rollback what was already done */
for (j=0; j < i; j++) OBJ_RELEASE(apps[j]);
opal_progress_event_decrement();
opal_progress_event_users_decrement();
return ORTE_ERR_OUT_OF_RESOURCE;
}
asprintf(&(apps[i]->env[0]), "OMPI_PARENT_PORT=%s", port_name);
@ -571,7 +573,7 @@ ompi_comm_start_processes(int count, char **array_of_commands,
ORTE_RMGR_ATTR_OVERRIDE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&attributes);
opal_progress_event_decrement();
opal_progress_event_users_decrement();
return MPI_ERR_SPAWN;
}
@ -583,7 +585,7 @@ ompi_comm_start_processes(int count, char **array_of_commands,
ORTE_RMGR_ATTR_OVERRIDE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&attributes);
opal_progress_event_decrement();
opal_progress_event_users_decrement();
return MPI_ERR_SPAWN;
}
@ -593,7 +595,7 @@ ompi_comm_start_processes(int count, char **array_of_commands,
ORTE_RMGR_ATTR_OVERRIDE))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&attributes);
opal_progress_event_decrement();
opal_progress_event_users_decrement();
return MPI_ERR_SPAWN;
}
@ -616,7 +618,7 @@ ompi_comm_start_processes(int count, char **array_of_commands,
/* spawn procs */
if (ORTE_SUCCESS != (rc = orte_rmgr.spawn_job(apps, count, &new_jobid, 0, NULL, NULL, ORTE_PROC_STATE_NONE, &attributes))) {
ORTE_ERROR_LOG(rc);
opal_progress_event_decrement();
opal_progress_event_users_decrement();
return MPI_ERR_SPAWN;
}
@ -632,7 +634,7 @@ ompi_comm_start_processes(int count, char **array_of_commands,
}
/* clean up */
opal_progress_event_decrement();
opal_progress_event_users_decrement();
while (NULL != (item = opal_list_remove_first(&attributes))) OBJ_RELEASE(item);
OBJ_DESTRUCT(&attributes);

Просмотреть файл

@ -9,6 +9,9 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -506,7 +509,7 @@ static void mca_btl_mvapi_endpoint_connected(mca_btl_mvapi_endpoint_t *endpoint)
/**
* The connection is correctly setup. Now we can decrease the event trigger.
*/
opal_progress_event_decrement();
opal_progress_event_users_decrement();
while(!opal_list_is_empty(&(endpoint->pending_send_frags))) {
frag_item = opal_list_remove_first(&(endpoint->pending_send_frags));
@ -647,7 +650,7 @@ static void mca_btl_mvapi_endpoint_recv(
* let the event engine pool the OOB events. Note: we increment it once peer active
* connection.
*/
opal_progress_event_increment();
opal_progress_event_users_increment();
break;
case MCA_BTL_IB_CONNECTING :
@ -749,7 +752,7 @@ int mca_btl_mvapi_endpoint_send(
* let the event engine pool the OOB events. Note: we increment it once peer active
* connection.
*/
opal_progress_event_increment();
opal_progress_event_users_increment();
call_progress = 1;
break;

Просмотреть файл

@ -10,6 +10,9 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -542,7 +545,7 @@ static void mca_btl_openib_endpoint_connected(mca_btl_openib_endpoint_t *endpoin
/**
* The connection is correctly setup. Now we can decrease the event trigger.
*/
opal_progress_event_decrement();
opal_progress_event_users_decrement();
/* While there are frags in the list,
* process them */
@ -731,7 +734,7 @@ static void mca_btl_openib_endpoint_recv(
* let the event engine pool the OOB events. Note: we increment it once peer active
* connection.
*/
opal_progress_event_increment();
opal_progress_event_users_increment();
break;
case MCA_BTL_IB_CONNECTING :
@ -831,7 +834,7 @@ int mca_btl_openib_endpoint_send(
* let the event engine pool the OOB events. Note: we increment it once peer active
* connection.
*/
opal_progress_event_increment();
opal_progress_event_users_increment();
call_progress = true;
break;

Просмотреть файл

@ -9,6 +9,9 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -130,7 +133,7 @@ int mca_btl_tcp_add_procs(
/* we increase the count of MPI users of the event library
once per peer, so that we are used until we aren't
connected to a peer */
opal_progress_event_increment();
opal_progress_event_users_increment();
}
return OMPI_SUCCESS;
@ -149,7 +152,7 @@ int mca_btl_tcp_del_procs(struct mca_btl_base_module_t* btl,
opal_list_remove_item(&tcp_btl->tcp_endpoints, (opal_list_item_t*)tcp_endpoint);
OBJ_RELEASE(tcp_endpoint);
}
opal_progress_event_decrement();
opal_progress_event_users_decrement();
}
return OMPI_SUCCESS;
}
@ -490,7 +493,7 @@ int mca_btl_tcp_finalize(struct mca_btl_base_module_t* btl)
item = opal_list_remove_first(&tcp_btl->tcp_endpoints)) {
mca_btl_tcp_endpoint_t *endpoint = (mca_btl_tcp_endpoint_t*)item;
OBJ_RELEASE(endpoint);
opal_progress_event_decrement();
opal_progress_event_users_decrement();
}
free(tcp_btl);
return OMPI_SUCCESS;

Просмотреть файл

@ -10,6 +10,9 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -115,11 +118,12 @@ int ompi_mpi_finalize(void)
ompi_mpi_finalized = true;
#if OMPI_ENABLE_PROGRESS_THREADS == 0
opal_progress_events(OPAL_EVLOOP_ONELOOP);
opal_progress_set_event_flag(OPAL_EVLOOP_ONELOOP);
#endif
/* Change progress function priority back to RTE level stuff */
opal_progress_mpi_disable();
/* Redo ORTE calling opal_progress_event_users_increment() during
MPI lifetime, to get better latency when not using TCP */
opal_progress_event_users_increment();
/* If maffinity was setup, tear it down */
if (ompi_mpi_maffinity_setup) {

Просмотреть файл

@ -10,6 +10,9 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -281,7 +284,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
/* Setup process affinity */
if (ompi_mpi_paffinity_alone) {
int param, value;
bool set = false;
param = mca_base_param_find("mpi", NULL, "paffinity_processor");
if (param >= 0) {
@ -328,13 +330,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
goto error;
}
/* initialize the progress engine for MPI functionality */
if (OMPI_SUCCESS != opal_progress_mpi_init()) {
error = "opal_progress_mpi_init() failed";
goto error;
}
/* initialize ops. This has to be done *after* ddt_init, but
befor mca_coll_base_open, since come collective modules
(e.g. the hierarchical) need them in the query function
@ -678,17 +673,29 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
#if OMPI_ENABLE_PROGRESS_THREADS == 0
/* switch from letting us sit in the event library for a bit each
time through opal_progress() to completely non-blocking */
opal_progress_events(OPAL_EVLOOP_NONBLOCK);
opal_progress_set_event_flag(OPAL_EVLOOP_NONBLOCK);
#endif
/* put the event library in "high performance MPI mode" */
if (OMPI_SUCCESS != (ret = opal_progress_mpi_enable())) {
error = "opal_progress_mpi_enable() failed";
/* This will loop back up above, but ret != OMPI_SUCCESS, so
we'll end up returning out of this function before getting
here (and therefore avoiding an infinite loop) */
goto error;
/* Undo ORTE calling opal_progress_event_users_increment() during
MPI lifetime, to get better latency when not using TCP */
opal_progress_event_users_decrement();
/* override ORTE setting yield_when_idle, if desired */
param = mca_base_param_find("mpi", NULL, "yield_when_idle");
mca_base_param_lookup_int(param, &value);
if (value < 0) {
/* if we got a bogus value, do the conservative thing... */
opal_progress_set_yield_when_idle(true);
} else {
opal_progress_set_yield_when_idle(value == 0 ? false : true);
}
param = mca_base_param_find("mpi", NULL, "event_tick_rate");
mca_base_param_lookup_int(param, &value);
/* negative value means use default - just don't do anything */
if (value >= 0) {
opal_progress_set_event_poll_rate(value);
}
/* If we want the connection warmup, go do it */
if (ompi_mpi_preconnect_all) {

Просмотреть файл

@ -9,6 +9,9 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -31,6 +34,8 @@
int opal_register_params(void)
{
/*
* This string is going to be used in opal/util/stacktrace.c
*/
@ -68,5 +73,11 @@ int opal_register_params(void)
false, false, string, NULL);
}
#if OMPI_ENABLE_DEBUG
mca_base_param_reg_int_name("opal", "progress_debug",
"Set to non-zero to debug progress engine features",
false, false, 0, NULL);
#endif
return OPAL_SUCCESS;
}

Просмотреть файл

@ -9,6 +9,9 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -34,15 +37,8 @@
* default parameters
*/
static int opal_progress_event_flag = OPAL_EVLOOP_ONELOOP;
#if OPAL_PROGRESS_USE_TIMERS
static const opal_timer_t opal_progress_default_tick_rate = 10000; /* 10ms */
#else
static const int opal_progress_default_tick_rate = 10000; /* 10k calls to opal_progress */
#endif
volatile int32_t opal_progress_thread_count = 0;
int opal_progress_spin_count = 10000;
/*
@ -71,118 +67,33 @@ static int32_t event_progress_delta = 0;
#endif
/* users of the event library from MPI cause the tick rate to
be every time */
static int32_t event_num_mpi_users = 0;
static int32_t num_event_users = 0;
#if OMPI_ENABLE_DEBUG
static int debug_output = -1;
#endif
/* init the progress engine - called from orte_init */
int
opal_progress_init(void)
{
int param, value;
/* reentrant issues */
#if OMPI_HAVE_THREAD_SUPPORT
opal_atomic_init(&progress_lock, OPAL_ATOMIC_UNLOCKED);
#endif /* OMPI_HAVE_THREAD_SUPPORT */
/* always call sched yield when in the rte only... */
call_yield = 1;
#if OPAL_PROGRESS_USE_TIMERS
event_progress_delta = 0;
#if OPAL_TIMER_USEC_NATIVE
event_progress_last_time = opal_timer_base_get_usec();
#else
event_progress_last_time = opal_timer_base_get_cycles();
#endif
#else
event_progress_counter = event_progress_delta = 0;
#endif
return OPAL_SUCCESS;
}
int
opal_progress_mpi_init(void)
{
event_num_mpi_users = 0;
return OPAL_SUCCESS;
}
/* turn on MPI optimizations */
int
opal_progress_mpi_enable(void)
{
int param, value;
/* call sched yield when oversubscribed. */
param = mca_base_param_find("mpi", NULL, "yield_when_idle");
mca_base_param_lookup_int(param, &value);
if (value < 0) {
/* this should never happen set to 1 if it somehow does */
call_yield = 1;
} else {
call_yield = value;
}
/* set the event tick rate */
param = mca_base_param_find("mpi", NULL, "event_tick_rate");
opal_progress_set_event_poll_rate(10000);
#if OMPI_ENABLE_DEBUG
param = mca_base_param_find("opal", NULL, "progress_debug");
mca_base_param_lookup_int(param, &value);
if (value < 0) {
/* user didn't specify - default tick rate */
event_progress_delta = opal_progress_default_tick_rate;
} else if (value == 0) {
#if OPAL_PROGRESS_USE_TIMERS
/* user specified as never tick - tick once per minute */
event_progress_delta = 60 * 1000000;
#else
/* user specified as never tick - don't count often */
event_progress_delta = INT_MAX;
#endif
} else {
#if OPAL_PROGRESS_USE_TIMERS
event_progress_delta = value;
#else
/* subtract one so that we can do post-fix subtraction
in the inner loop and go faster */
event_progress_delta = value - 1;
#endif
if (value) {
debug_output = opal_output_open(NULL);
}
#if OPAL_PROGRESS_USE_TIMERS && !OPAL_TIMER_USEC_NATIVE
/* going to use cycles for counter. Adjust specified usec into cycles */
event_progress_delta = event_progress_delta * opal_timer_base_get_freq() / 1000000;
#endif
#if OPAL_PROGRESS_USE_TIMERS
#if OPAL_TIMER_USEC_NATIVE
event_progress_last_time = opal_timer_base_get_usec();
#else
event_progress_last_time = opal_timer_base_get_cycles();
#endif
#else
/* it's possible that an init function bumped up our tick rate.
* If so, set the event_progress counter to 0. Otherwise, set it to
* the reset value */
event_progress_counter = (event_num_mpi_users > 0) ?
0 : event_progress_delta;
#endif
return OPAL_SUCCESS;
}
int
opal_progress_mpi_disable(void)
{
/* always call sched yield from here on... */
call_yield = 1;
/* always tick the event library */
event_progress_delta = 0;
#if !OPAL_PROGRESS_USE_TIMERS
event_progress_counter = 0;
#endif
return OPAL_SUCCESS;
@ -192,19 +103,17 @@ opal_progress_mpi_disable(void)
int
opal_progress_finalize(void)
{
/* don't need to free the progess lock */
/* free memory associated with the callbacks */
#if OMPI_HAVE_THREAD_SUPPORT
opal_atomic_lock(&progress_lock);
#endif
callbacks_len = 0;
callbacks_size = 0;
if (NULL != callbacks) {
free(callbacks);
callbacks = NULL;
}
callbacks_len = 0;
callbacks_size = 0;
#if OMPI_HAVE_THREAD_SUPPORT
opal_atomic_unlock(&progress_lock);
@ -214,14 +123,6 @@ opal_progress_finalize(void)
}
void
opal_progress_events(int flag)
{
opal_progress_event_flag = flag;
}
/*
* Progress the event library and any functions that have registered to
* be called. We don't propogate errors from the progress functions,
@ -253,7 +154,7 @@ opal_progress(void)
#if OMPI_HAVE_THREAD_SUPPORT
if (opal_atomic_trylock(&progress_lock)) {
#endif /* OMPI_HAVE_THREAD_SUPPORT */
event_progress_last_time = (event_num_mpi_users > 0) ?
event_progress_last_time = (num_event_users > 0) ?
now - event_progress_delta : now;
events += opal_event_loop(opal_progress_event_flag);
@ -271,7 +172,7 @@ opal_progress(void)
if (opal_atomic_trylock(&progress_lock)) {
#endif /* OMPI_HAVE_THREAD_SUPPORT */
event_progress_counter =
(event_num_mpi_users > 0) ? 0 : event_progress_delta;
(num_event_users > 0) ? 0 : event_progress_delta;
events += opal_event_loop(opal_progress_event_flag);
#if OMPI_HAVE_THREAD_SUPPORT
opal_atomic_unlock(&progress_lock);
@ -305,6 +206,103 @@ opal_progress(void)
}
int
opal_progress_set_event_flag(int flag)
{
int tmp = opal_progress_event_flag;
opal_progress_event_flag = flag;
return tmp;
}
void
opal_progress_event_users_increment(void)
{
int32_t val;
val = opal_atomic_add_32(&num_event_users, 1);
OPAL_OUTPUT((debug_output, "event_users_increment setting count to %d", val));
#if OPAL_PROGRESS_USE_TIMERS
/* force an update next round (we'll be past the delta) */
event_progress_last_time -= event_progress_delta;
#else
/* always reset the tick rate - can't hurt */
event_progress_counter = 0;
#endif
}
void
opal_progress_event_users_decrement(void)
{
int32_t val;
val = opal_atomic_sub_32(&num_event_users, 1);
OPAL_OUTPUT((debug_output, "event_users_decrement setting count to %d", val));
#if !OPAL_PROGRESS_USE_TIMERS
/* start now in delaying if it's easy */
if (val >= 0) {
event_progress_counter = event_progress_delta;
}
#endif
}
bool
opal_progress_set_yield_when_idle(bool yieldopt)
{
bool tmp = (call_yield == 0) ? false : true;
call_yield = (yieldopt) ? 1 : 0;
OPAL_OUTPUT((debug_output, "progress_set_yield_when_idle to %d", call_yield));
return tmp;
}
void
opal_progress_set_event_poll_rate(int polltime)
{
OPAL_OUTPUT((debug_output, "progress_set_event_poll_rate(%d)", polltime));
#if OPAL_PROGRESS_USE_TIMERS
event_progress_delta = 0;
# if OPAL_TIMER_USEC_NATIVE
event_progress_last_time = opal_timer_base_get_usec();
# else
event_progress_last_time = opal_timer_base_get_cycles();
# endif
#else
event_progress_counter = event_progress_delta = 0;
#endif
if (polltime == 0) {
#if OPAL_PROGRESS_USE_TIMERS
/* user specified as never tick - tick once per minute */
event_progress_delta = 60 * 1000000;
#else
/* user specified as never tick - don't count often */
event_progress_delta = INT_MAX;
#endif
} else {
#if OPAL_PROGRESS_USE_TIMERS
event_progress_delta = polltime;
#else
/* subtract one so that we can do post-fix subtraction
in the inner loop and go faster */
event_progress_delta = polltime - 1;
#endif
}
#if OPAL_PROGRESS_USE_TIMERS && !OPAL_TIMER_USEC_NATIVE
/* going to use cycles for counter. Adjust specified usec into cycles */
event_progress_delta = event_progress_delta * opal_timer_base_get_freq() / 1000000;
#endif
}
int
opal_progress_register(opal_progress_callback_t cb)
{
@ -378,38 +376,3 @@ opal_progress_unregister(opal_progress_callback_t cb)
return ret;
}
int
opal_progress_event_increment()
{
int32_t val;
val = opal_atomic_add_32(&event_num_mpi_users, 1);
#if OPAL_PROGRESS_USE_TIMERS
/* force an update next round (we'll be past the delta) */
event_progress_last_time -= event_progress_delta;
#else
/* always reset the tick rate - can't hurt */
event_progress_counter = 0;
#endif
return OPAL_SUCCESS;
}
int
opal_progress_event_decrement()
{
int32_t val;
val = opal_atomic_sub_32(&event_num_mpi_users, 1);
#if !OPAL_PROGRESS_USE_TIMERS
/* start now in delaying if it's easy */
if (val >= 0) {
event_progress_counter = event_progress_delta;
}
#endif
return OPAL_SUCCESS;
}

Просмотреть файл

@ -9,6 +9,9 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -22,11 +25,13 @@
* Progress engine for Open MPI
*/
#ifndef _OMPI_PROGRESS_H_
#define _OMPI_PROGRESS_H_
#ifndef OPAL_RUNTIME_OPAL_PROGRESS_H
#define OPAL_RUNTIME_OPAL_PROGRESS_H
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
#include "opal/threads/mutex.h"
/**
@ -39,35 +44,6 @@ extern "C" {
*/
OPAL_DECLSPEC int opal_progress_init(void);
/**
* Configure the progress engine for executing MPI applications
*
* Register to receive any needed information from the GPR and
* intialize any data structures required for MPI applications.
*
* \note opal_progress_init() must be called before calling
* this function. Failure to do so is an error.
*/
OPAL_DECLSPEC int opal_progress_mpi_init(void);
/**
* Turn on optimizations for MPI progress
*
* Turn on optimizations for MPI applications. This includes lowering
* the rate at which the event library is ticked if it is not under
* active use and possibly disabling the sched_yield call when the
* progress engine is idle
*/
OPAL_DECLSPEC int opal_progress_mpi_enable(void);
/**
* Turn off all optimizations enabled by opal_progress_mpi_enable().
*
* Completely reverses all optimizations enabled by
* opal_progress_mpi_enable(). The event library resumes constant
* ticking and the progress engine yields the CPU when idle.
*/
OPAL_DECLSPEC int opal_progress_mpi_disable(void);
/**
* Shut down the progress engine
@ -78,45 +54,126 @@ OPAL_DECLSPEC int opal_progress_mpi_disable(void);
*/
OPAL_DECLSPEC int opal_progress_finalize(void);
/**
* Control how the event library is called
*/
OPAL_DECLSPEC void opal_progress_events(int);
/**
* Progress all pending events
*
* Progress all pending events. All registered event handlers will be
* called every call into opal_progress(). The event library will be
* called if opal_progress_event_users is greater than 0 (adjustments
* can be made by calling opal_progress_event_users_add() and
* opal_progress_event_users_delete()) or the time since the last call
* into the event library is greater than the progress tick rate (by
* default, 10ms).
*/
OPAL_DECLSPEC void opal_progress(void);
/**
* Control how the event library is called
*
* Adjust the flags argument used to call opal_event_loop() from
* opal_progress(). The default argument is OPAL_EVLOOP_ONELOOP,
* meaning that the call to opal_event_loop() will block pending
* events, but may block for a period of time.
*
* @param flags One of the valid vlags argument to
* opal_event_loop().
* @return Previous value of flags used to call
* opal_event_loop().
*/
OPAL_DECLSPEC int opal_progress_set_event_flag(int flags);
/**
* Increase the number of users of the event library
*
* Increase the number of users of the event library. This count is
* used by opal_progress to determine if opal_event_loop() should be
* called every call to opal_progress() or only after a time has
* elapsed since the last call (by default, 10ms). The count defaults
* to 0, meaning that opal_progress_event_users_increment() must be
* called at least once for the event loop to be called on every entry
* to opal_progress().
*
*/
OPAL_DECLSPEC void opal_progress_event_users_increment(void);
/**
* Decrease the number of users of the event library
*
* Decrease the number of users of the event library. This count is
* used by opal_progress to determine if opal_event_loop() should be
* called every call to opal_progress() or only after a time has
* elapsed since the last call (by default, 10ms).
*/
OPAL_DECLSPEC void opal_progress_event_users_decrement(void);
/**
* Set whether opal_progress() should yield when idle
*
* Set whether opal_progress() should yield the processor (either by
* sched_yield() or SwitchToThread()) if no events were progressed
* during the progress loop. The return value of the callback
* functions is used to determine whether or not yielding is required.
* By default, the event loop will yield when the progress function is
* idle.
*
* @param yieldopt Whether to yield when idle.
* @return Previous value of the yield_when_idle option.
*/
OPAL_DECLSPEC bool opal_progress_set_yield_when_idle(bool yieldopt);
/**
* Set time between calls into the event library
*
* Set time between calls into the event library when there are no
* users of the event library (set by
* opal_progress_event_users_increment() and
* opal_progress_event_users_decrement()).
*
* @param polltime Time (in microseconds) between calls to the event
* library
*/
OPAL_DECLSPEC void opal_progress_set_event_poll_rate(int microseconds);
/**
* Progress callback function typedef
*
* Prototype for the a progress function callback. Progress function
* callbacks can be registered with opal_progress_register() and
* deregistered with opal_progress_deregister(). It should be noted
* that either registering or deregistering a function callback is an
* extraordinarily expensive operation and should not be used for
* potentially short callback lifetimes.
*
* @return Number of events progressed during the callback
*/
typedef int (*opal_progress_callback_t)(void);
/**
* Register an event to be progressed
*
* Register an event to be progressed during calls to opal_progress().
* Please read the note in opal_progress_callback_t.
*/
OPAL_DECLSPEC int opal_progress_register(opal_progress_callback_t cb);
/**
* Unregister previously registered event
* Deregister previously registered event
*
* Deregister an event to be progressed during calls to opal_progress().
* Please read the note in opal_progress_callback_t.
*/
OPAL_DECLSPEC int opal_progress_unregister(opal_progress_callback_t cb);
/**
* Increase count of MPI users of the event library
*/
OPAL_DECLSPEC int opal_progress_event_increment(void);
/**
* Decrease count of MPI users of the event library
*/
OPAL_DECLSPEC int opal_progress_event_decrement(void);
/**
* Progress until flag is true or poll iterations completed
*/
OPAL_DECLSPEC extern volatile int32_t opal_progress_thread_count;
OPAL_DECLSPEC extern int opal_progress_spin_count;
@ -126,6 +183,9 @@ static inline bool opal_progress_threads(void)
}
/**
* Progress until flag is true or poll iterations completed
*/
static inline bool opal_progress_spin(volatile bool* complete)
{
int32_t c;

Просмотреть файл

@ -9,6 +9,9 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Los Alamos National Security, LLC. All rights
* reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -159,6 +162,8 @@ int orte_init_stage1(bool infrastructure)
error = "opal_progress_init";
goto error;
}
/* we want to tick the event library whenever possible */
opal_progress_event_users_increment();
/*
* Internal startup