2004-08-15 07:33:13 +04:00
|
|
|
/* -*- C -*-
|
2004-11-22 04:38:40 +03:00
|
|
|
*
|
|
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
|
|
|
* All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
|
|
|
* All rights reserved.
|
2004-11-28 23:09:25 +03:00
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
2005-03-24 15:43:37 +03:00
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
2004-11-22 04:38:40 +03:00
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
2004-08-15 07:33:13 +04:00
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*
|
|
|
|
*/
|
2004-11-20 22:12:43 +03:00
|
|
|
/** @file
|
|
|
|
*/
|
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
#ifndef ORTE_GPR_REPLICA_H
|
|
|
|
#define ORTE_GPR_REPLICA_H
|
2004-08-15 07:33:13 +04:00
|
|
|
|
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
#include "orte_config.h"
|
2004-08-27 09:23:04 +04:00
|
|
|
|
|
|
|
#include <time.h>
|
|
|
|
|
2005-05-24 17:39:15 +04:00
|
|
|
#include "class/orte_bitmap.h"
|
2005-03-14 23:57:21 +03:00
|
|
|
#include "class/orte_pointer_array.h"
|
|
|
|
#include "class/orte_value_array.h"
|
|
|
|
|
2005-07-04 02:45:48 +04:00
|
|
|
#include "opal/threads/mutex.h"
|
|
|
|
#include "opal/threads/condition.h"
|
2004-11-20 22:12:43 +03:00
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
#include "mca/ns/ns_types.h"
|
|
|
|
|
2004-08-15 07:33:13 +04:00
|
|
|
#include "mca/gpr/base/base.h"
|
|
|
|
|
2005-05-12 00:21:10 +04:00
|
|
|
#if defined(c_plusplus) || defined(__cplusplus)
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
2004-08-15 07:33:13 +04:00
|
|
|
/*
|
|
|
|
* typedefs needed in replica component
|
|
|
|
*/
|
2004-08-27 09:23:04 +04:00
|
|
|
|
2005-05-01 04:53:00 +04:00
|
|
|
typedef size_t orte_gpr_replica_itag_t;
|
|
|
|
#define ORTE_GPR_REPLICA_ITAG_MAX SIZE_MAX
|
2004-08-27 19:36:53 +04:00
|
|
|
|
2004-09-30 01:54:57 +04:00
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
typedef uint8_t orte_gpr_replica_addr_mode_t;
|
2004-09-30 01:54:57 +04:00
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
#define ORTE_GPR_REPLICA_AND (uint8_t)0x01
|
|
|
|
#define ORTE_GPR_REPLICA_OR (uint8_t)0x02
|
|
|
|
#define ORTE_GPR_REPLICA_XAND (uint8_t)0x04
|
|
|
|
#define ORTE_GPR_REPLICA_XOR (uint8_t)0x08
|
|
|
|
#define ORTE_GPR_REPLICA_NOT (uint8_t)0x40
|
2004-08-15 07:33:13 +04:00
|
|
|
|
2004-11-20 22:12:43 +03:00
|
|
|
|
|
|
|
/* define a few action flags for trigger evaluation
|
|
|
|
*/
|
2005-06-09 17:35:35 +04:00
|
|
|
#define ORTE_GPR_REPLICA_NO_ACTION (int8_t) 0x00
|
|
|
|
#define ORTE_GPR_REPLICA_ENTRY_ADDED (int8_t) 0x01
|
|
|
|
#define ORTE_GPR_REPLICA_ENTRY_DELETED (int8_t) 0x02
|
|
|
|
#define ORTE_GPR_REPLICA_ENTRY_CHANGED (int8_t) 0x04
|
|
|
|
#define ORTE_GPR_REPLICA_ENTRY_CHG_TO (int8_t) 0x08
|
|
|
|
#define ORTE_GPR_REPLICA_ENTRY_CHG_FRM (int8_t) 0x10
|
2005-06-08 23:40:38 +04:00
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
|
2005-07-20 22:07:46 +04:00
|
|
|
typedef uint8_t orte_gpr_replica_action_t;
|
2005-03-14 23:57:21 +03:00
|
|
|
|
2005-06-24 20:59:37 +04:00
|
|
|
/*
|
|
|
|
* Local subscription tracker for use by processes
|
|
|
|
* that are operating on the same node as the replica
|
|
|
|
*/
|
|
|
|
typedef struct {
|
2005-07-03 20:06:07 +04:00
|
|
|
opal_object_t super; /**< Allows this to be an object */
|
2005-06-24 20:59:37 +04:00
|
|
|
orte_gpr_subscription_id_t id; /**< id of this subscription */
|
2005-08-01 20:38:15 +04:00
|
|
|
char *name;
|
2005-06-24 20:59:37 +04:00
|
|
|
orte_gpr_notify_cb_fn_t callback; /**< Function to be called for notificaiton */
|
|
|
|
void *user_tag; /**< User-provided tag for callback function */
|
|
|
|
} orte_gpr_replica_local_subscriber_t;
|
|
|
|
|
|
|
|
OBJ_CLASS_DECLARATION(orte_gpr_replica_local_subscriber_t);
|
|
|
|
|
|
|
|
|
2005-07-18 22:49:00 +04:00
|
|
|
/*
|
|
|
|
* Local trigger tracker for use by processes
|
|
|
|
* that are operating on the same node as the replica
|
|
|
|
*/
|
|
|
|
typedef struct {
|
|
|
|
opal_object_t super; /**< Allows this to be an object */
|
|
|
|
orte_gpr_trigger_id_t id; /**< id of this trigger */
|
2005-08-01 20:38:15 +04:00
|
|
|
char *name;
|
2005-07-18 22:49:00 +04:00
|
|
|
orte_gpr_trigger_cb_fn_t callback; /**< Function to be called for notification */
|
|
|
|
void *user_tag; /**< User-provided tag for callback function */
|
|
|
|
} orte_gpr_replica_local_trigger_t;
|
|
|
|
|
|
|
|
OBJ_CLASS_DECLARATION(orte_gpr_replica_local_trigger_t);
|
|
|
|
|
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
typedef struct {
|
|
|
|
int debug;
|
|
|
|
int isolate;
|
2005-07-04 02:45:48 +04:00
|
|
|
opal_mutex_t mutex;
|
2005-06-24 20:59:37 +04:00
|
|
|
size_t num_local_subs;
|
|
|
|
orte_pointer_array_t *local_subscriptions;
|
2005-07-18 22:49:00 +04:00
|
|
|
size_t num_local_trigs;
|
|
|
|
orte_pointer_array_t *local_triggers;
|
2005-06-24 20:59:37 +04:00
|
|
|
size_t num_srch_cptr;
|
2005-03-14 23:57:21 +03:00
|
|
|
orte_pointer_array_t *srch_cptr;
|
2005-07-17 03:08:15 +04:00
|
|
|
size_t num_overwritten;
|
|
|
|
orte_pointer_array_t *overwritten;
|
2005-06-24 20:59:37 +04:00
|
|
|
orte_pointer_array_t *sub_ptrs;
|
|
|
|
size_t num_srch_ival;
|
2005-03-14 23:57:21 +03:00
|
|
|
orte_pointer_array_t *srch_ival;
|
2005-06-10 00:37:25 +04:00
|
|
|
size_t num_acted_upon;
|
2005-06-24 20:59:37 +04:00
|
|
|
orte_pointer_array_t *acted_upon;
|
2005-05-24 17:39:15 +04:00
|
|
|
orte_bitmap_t srch_itag;
|
2005-03-14 23:57:21 +03:00
|
|
|
} orte_gpr_replica_globals_t;
|
|
|
|
|
|
|
|
|
|
|
|
/** Dictionary of string-itag pairs.
|
|
|
|
* This structure is used to create a linked list of string-itag pairs. All calls to
|
|
|
|
* registry functions pass character strings for programming clarity - the replica_dict
|
|
|
|
* structure is used to translate those strings into an integer itag value, thus allowing
|
|
|
|
* for faster searches of the registry.
|
2004-08-15 07:33:13 +04:00
|
|
|
*/
|
2005-03-14 23:57:21 +03:00
|
|
|
struct orte_gpr_replica_dict_t {
|
|
|
|
char *entry; /**< Char string that defines the itag */
|
|
|
|
orte_gpr_replica_itag_t itag; /**< Numerical value assigned by registry to represent string */
|
2004-08-15 07:33:13 +04:00
|
|
|
};
|
2005-03-14 23:57:21 +03:00
|
|
|
typedef struct orte_gpr_replica_dict_t orte_gpr_replica_dict_t;
|
2004-08-15 07:33:13 +04:00
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
/*
|
|
|
|
* Registry "head"
|
|
|
|
* The registry "head" contains:
|
|
|
|
*
|
|
|
|
* (2) the next available itag for the segment dictionary.
|
|
|
|
*
|
|
|
|
* (3) a managed array of pointers to segment objects.
|
2004-08-15 07:33:13 +04:00
|
|
|
*
|
2005-03-14 23:57:21 +03:00
|
|
|
* (4) a managed array of pointers to triggers acting on the entire registry
|
|
|
|
*
|
2004-08-15 07:33:13 +04:00
|
|
|
*/
|
2005-03-14 23:57:21 +03:00
|
|
|
struct orte_gpr_replica_t {
|
|
|
|
orte_pointer_array_t *segments; /**< Managed array of pointers to segment objects */
|
2005-06-10 00:37:25 +04:00
|
|
|
size_t num_segs;
|
2005-03-14 23:57:21 +03:00
|
|
|
orte_pointer_array_t *triggers; /**< Managed array of pointers to triggers */
|
2005-06-10 00:37:25 +04:00
|
|
|
size_t num_trigs;
|
2005-06-24 20:59:37 +04:00
|
|
|
orte_pointer_array_t *subscriptions; /**< Managed array of pointers to subscriptions */
|
|
|
|
size_t num_subs;
|
2005-06-13 21:00:57 +04:00
|
|
|
bool processing_callbacks;
|
2005-07-03 20:22:16 +04:00
|
|
|
opal_list_t callbacks; /**< List of callbacks to be processed */
|
2004-08-15 07:33:13 +04:00
|
|
|
};
|
2005-03-14 23:57:21 +03:00
|
|
|
typedef struct orte_gpr_replica_t orte_gpr_replica_t;
|
2004-08-15 07:33:13 +04:00
|
|
|
|
|
|
|
|
|
|
|
/** Registry segment definition.
|
|
|
|
* The registry is subdivided into segments, each defining a unique domain. The "universe" segment
|
|
|
|
* is automatically created to allow the exchange of information supporting universe-level functions.
|
|
|
|
* Similarly, a segment is automatically created for each MPI CommWorld within the universe - the
|
|
|
|
* name for that segment is stored in each CommWorld's ompi_system_info structure so program
|
|
|
|
* elements within that CommWorld can access it. The segment structure serves as the "head" of a linked
|
2005-03-14 23:57:21 +03:00
|
|
|
* list of registry elements for that segment. Each segment also holds its own token-itag dictionary
|
2004-08-15 07:33:13 +04:00
|
|
|
* to avoid naming conflicts between tokens from CommWorlds sharing a given universe.
|
|
|
|
*/
|
2005-03-14 23:57:21 +03:00
|
|
|
struct orte_gpr_replica_segment_t {
|
2005-07-03 20:06:07 +04:00
|
|
|
opal_object_t super; /**< Make this an object */
|
2005-03-14 23:57:21 +03:00
|
|
|
char *name; /**< Name of the segment */
|
|
|
|
orte_gpr_replica_itag_t itag; /**< itag of this segment */
|
2005-06-24 20:59:37 +04:00
|
|
|
size_t num_dict_entries;
|
2005-03-14 23:57:21 +03:00
|
|
|
orte_pointer_array_t *dict; /**< Managed array of dict structs */
|
2005-06-24 20:59:37 +04:00
|
|
|
size_t num_containers;
|
2005-03-14 23:57:21 +03:00
|
|
|
orte_pointer_array_t *containers; /**< Managed array of pointers to containers on this segment */
|
2004-08-15 07:33:13 +04:00
|
|
|
};
|
2005-03-14 23:57:21 +03:00
|
|
|
typedef struct orte_gpr_replica_segment_t orte_gpr_replica_segment_t;
|
2004-11-17 01:53:33 +03:00
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
OBJ_CLASS_DECLARATION(orte_gpr_replica_segment_t);
|
2004-11-17 01:53:33 +03:00
|
|
|
|
2004-11-20 22:12:43 +03:00
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
/** The core registry structure.
|
|
|
|
* Each segment of the registry contains an array of registry containers, each composed
|
|
|
|
* of:
|
|
|
|
*
|
|
|
|
* (1) An object structure that allows the structure to be treated with the OBJ
|
|
|
|
* memory management system
|
|
|
|
*
|
|
|
|
* (2) An array of itags that define the container - these are 1:1 correspondents with
|
|
|
|
* the character string tokens provided by caller
|
|
|
|
*
|
|
|
|
* (3) An array of indices into the trigger notifier array - each index points to
|
|
|
|
* a notifier whose trigger refers to this container.
|
|
|
|
*
|
|
|
|
* (4) An array of pointers to keyval objects that actually hold the data.
|
|
|
|
*
|
|
|
|
* At this time, no security is provided on an object-level basis. Thus, all requests for an
|
|
|
|
* object are automatically granted. This may be changed at some future time by adding an
|
|
|
|
* "authorization" linked list of ID's and their access rights to this structure.
|
2004-11-20 22:12:43 +03:00
|
|
|
*/
|
2005-03-14 23:57:21 +03:00
|
|
|
struct orte_gpr_replica_container_t {
|
2005-07-03 20:06:07 +04:00
|
|
|
opal_object_t super; /**< Make this an object */
|
2005-05-01 04:53:00 +04:00
|
|
|
size_t index; /**< Location in the pointer array */
|
2005-03-14 23:57:21 +03:00
|
|
|
orte_gpr_replica_itag_t *itags; /**< Array of itags that define this container */
|
2005-05-01 04:53:00 +04:00
|
|
|
size_t num_itags; /**< Number of itags in array */
|
2005-03-14 23:57:21 +03:00
|
|
|
orte_pointer_array_t *itagvals; /**< Array of itagval pointers */
|
2005-06-24 20:59:37 +04:00
|
|
|
size_t num_itagvals; /**< Number of itagvals in container */
|
2005-03-14 23:57:21 +03:00
|
|
|
orte_value_array_t itaglist; /**< Array of itags from all itagvals - used for rapid search */
|
|
|
|
};
|
|
|
|
typedef struct orte_gpr_replica_container_t orte_gpr_replica_container_t;
|
2004-11-20 22:12:43 +03:00
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
OBJ_CLASS_DECLARATION(orte_gpr_replica_container_t);
|
2004-11-20 22:12:43 +03:00
|
|
|
|
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
/* The itag-value pair for storing data entries in the registry
|
2004-11-20 22:12:43 +03:00
|
|
|
*/
|
2005-03-14 23:57:21 +03:00
|
|
|
typedef struct {
|
2005-07-03 20:06:07 +04:00
|
|
|
opal_object_t super; /**< required for this to be an object */
|
2005-05-01 04:53:00 +04:00
|
|
|
size_t index; /**< index of this itagval on the container array */
|
2005-03-14 23:57:21 +03:00
|
|
|
orte_gpr_replica_itag_t itag; /**< itag for this value's key */
|
|
|
|
orte_data_type_t type; /**< the type of value stored */
|
|
|
|
orte_gpr_value_union_t value; /**< Actual stored value */
|
|
|
|
} orte_gpr_replica_itagval_t;
|
|
|
|
|
2005-06-14 01:41:25 +04:00
|
|
|
OBJ_CLASS_DECLARATION(orte_gpr_replica_itagval_t);
|
2005-03-14 23:57:21 +03:00
|
|
|
|
2005-06-24 20:59:37 +04:00
|
|
|
/* The equivalent of the value structure, only using internal
|
|
|
|
* itags for the tokens/keys and pointers to internal structures
|
|
|
|
*/
|
|
|
|
typedef struct {
|
2005-07-03 20:06:07 +04:00
|
|
|
opal_object_t super; /**< Makes this an object */
|
2005-06-24 20:59:37 +04:00
|
|
|
size_t index;
|
|
|
|
/* the segment upon which this data is located */
|
|
|
|
orte_gpr_replica_segment_t *seg;
|
|
|
|
/* describe the data */
|
|
|
|
orte_gpr_addr_mode_t addr_mode; /**< Tokens/keys addressing mode */
|
|
|
|
orte_value_array_t tokentags; /**< Array of tokens defining which containers are affected */
|
|
|
|
orte_value_array_t keytags; /**< Array of keys defining which key-value pairs are affected */
|
|
|
|
} orte_gpr_replica_ivalue_t;
|
|
|
|
|
|
|
|
OBJ_CLASS_DECLARATION(orte_gpr_replica_ivalue_t);
|
|
|
|
|
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
typedef struct {
|
2005-07-03 20:06:07 +04:00
|
|
|
opal_object_t super;
|
2005-03-14 23:57:21 +03:00
|
|
|
orte_gpr_replica_segment_t *seg;
|
|
|
|
orte_gpr_replica_container_t *cptr;
|
|
|
|
orte_gpr_replica_itagval_t *iptr;
|
2005-03-31 18:24:36 +04:00
|
|
|
orte_gpr_replica_itagval_t trigger_level;
|
2005-03-14 23:57:21 +03:00
|
|
|
} orte_gpr_replica_counter_t;
|
|
|
|
|
|
|
|
OBJ_CLASS_DECLARATION(orte_gpr_replica_counter_t);
|
|
|
|
|
2005-06-24 20:59:37 +04:00
|
|
|
typedef struct {
|
2005-07-03 20:06:07 +04:00
|
|
|
opal_object_t super;
|
2005-06-24 20:59:37 +04:00
|
|
|
/* index of this entry in requestor array */
|
|
|
|
size_t index;
|
|
|
|
/* process name of the recipient - set to NULL if local */
|
|
|
|
orte_process_name_t *requestor;
|
|
|
|
/* idtag associated with this subscription */
|
|
|
|
orte_gpr_subscription_id_t idtag;
|
|
|
|
/* for a local subscription, where this block of data goes */
|
|
|
|
orte_gpr_notify_cb_fn_t callback; /**< Function to be called for notification */
|
|
|
|
void *user_tag; /**< User-provided tag for callback function */
|
|
|
|
} orte_gpr_replica_requestor_t;
|
|
|
|
|
|
|
|
OBJ_CLASS_DECLARATION(orte_gpr_replica_requestor_t);
|
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
typedef struct {
|
2005-07-03 20:06:07 +04:00
|
|
|
opal_object_t super; /**< Makes this an object */
|
2005-06-24 20:59:37 +04:00
|
|
|
/* index of this entry in subscription array - corresponds to local idtag */
|
|
|
|
size_t index;
|
|
|
|
/* name of this subscription, if provided */
|
|
|
|
char *name;
|
|
|
|
/* boolean indicating if this subscription is active or not */
|
|
|
|
bool active;
|
Add a job_info segment to the system that holds a container for each job. Within each container is a keyval indicating the job state (i.e., all procs at stage1, finalized, etc.). This provides a rough state-of-health for the job.
This required a little fiddling with a number of areas. Biggest problem was that it uncovered a potential for an infinite loop to be created in the registry. If a callback function modified the registry, the registry checked the triggers to see if anything had fired. Well, if the original callback was due to a trigger firing, that condition hadn't changed - so the trigger fired again....which caused the callback to be called, which modified the registry, which checked the triggers, etc. etc.
Triggers are now checked and then "flagged" as being "in process" so that the registry will NOT recheck that trigger until all callbacks have been processed. Tried doing this with subscriptions as well, but that caused a problem - when we release processes from a stagegate, they (at the moment) immediately place data on the registry that should cause a subscription to fire. Unfortunately, the system will just hang if that subscription doesn't get processed. So, I have left the subscription system alone - any callback function that modifies the registry in a fashion that will fire a subscription will indeed fire that subscription. We'll have to see if this causes problems - it shouldn't, but a careless user could lock things up if the callback generates a callback to itself.
Also fixed the code that placed a process' RML contact info on the registry to eliminate the leading '/' from the string.
This commit was SVN r6684.
2005-07-29 18:11:19 +04:00
|
|
|
/* boolean indicating that this subscription is already being
|
|
|
|
* processed - required to prevent infinite loops should a
|
|
|
|
* callback function modify the registry
|
|
|
|
*/
|
|
|
|
bool processing;
|
2005-06-24 20:59:37 +04:00
|
|
|
/* boolean indicating that this subscription
|
|
|
|
* should be removed after processing
|
|
|
|
* is completed
|
|
|
|
*/
|
|
|
|
bool cleanup;
|
|
|
|
/* action flags describing when the subscription should
|
|
|
|
* generate a notification message. This can be NULL if
|
|
|
|
* the subscription only operates in conjunction
|
|
|
|
* with a trigger
|
2005-03-14 23:57:21 +03:00
|
|
|
*/
|
2005-06-24 20:59:37 +04:00
|
|
|
orte_gpr_notify_action_t action;
|
|
|
|
/* Array of ivalues that describe the data to be
|
|
|
|
* returned when this subscription is "fired"
|
|
|
|
*/
|
|
|
|
size_t num_values;
|
|
|
|
orte_pointer_array_t *values;
|
|
|
|
/*
|
|
|
|
* Array of requestors that are "attached" to this subscription
|
|
|
|
*/
|
|
|
|
size_t num_requestors;
|
|
|
|
orte_pointer_array_t *requestors;
|
|
|
|
} orte_gpr_replica_subscription_t;
|
|
|
|
|
|
|
|
OBJ_CLASS_DECLARATION(orte_gpr_replica_subscription_t);
|
2005-03-14 23:57:21 +03:00
|
|
|
|
|
|
|
|
2005-06-24 20:59:37 +04:00
|
|
|
typedef struct {
|
2005-07-03 20:06:07 +04:00
|
|
|
opal_object_t super;
|
2005-06-24 20:59:37 +04:00
|
|
|
/* index of this entry in array */
|
|
|
|
size_t index;
|
|
|
|
/* process name of the requestor - set to NULL if local */
|
|
|
|
orte_process_name_t *requestor;
|
|
|
|
/* requestor's id for this trigger */
|
|
|
|
orte_gpr_trigger_id_t idtag;
|
|
|
|
} orte_gpr_replica_trigger_requestor_t;
|
|
|
|
|
|
|
|
OBJ_CLASS_DECLARATION(orte_gpr_replica_trigger_requestor_t);
|
|
|
|
|
|
|
|
|
|
|
|
struct orte_gpr_replica_trigger_t {
|
2005-07-03 20:06:07 +04:00
|
|
|
opal_object_t super; /**< Make this an object */
|
2005-06-24 20:59:37 +04:00
|
|
|
/* name of this trigger, if provided */
|
|
|
|
char *name;
|
|
|
|
/* index of this trigger in the triggers array - corresponds to local idtag */
|
2005-05-01 04:53:00 +04:00
|
|
|
size_t index;
|
2005-06-24 20:59:37 +04:00
|
|
|
/* array of requestors that have "attached" themselves to this trigger */
|
|
|
|
size_t num_attached;
|
|
|
|
orte_pointer_array_t *attached;
|
2005-07-18 22:49:00 +04:00
|
|
|
/* the "master" requestor - if someone asks to have all
|
|
|
|
* output routed through them, we record their info here
|
|
|
|
* so we can comply
|
|
|
|
*/
|
|
|
|
orte_gpr_replica_trigger_requestor_t *master;
|
2005-06-24 20:59:37 +04:00
|
|
|
/* the action that causes the trigger to be fired */
|
2005-07-20 22:07:46 +04:00
|
|
|
orte_gpr_trigger_action_t action;
|
Add a job_info segment to the system that holds a container for each job. Within each container is a keyval indicating the job state (i.e., all procs at stage1, finalized, etc.). This provides a rough state-of-health for the job.
This required a little fiddling with a number of areas. Biggest problem was that it uncovered a potential for an infinite loop to be created in the registry. If a callback function modified the registry, the registry checked the triggers to see if anything had fired. Well, if the original callback was due to a trigger firing, that condition hadn't changed - so the trigger fired again....which caused the callback to be called, which modified the registry, which checked the triggers, etc. etc.
Triggers are now checked and then "flagged" as being "in process" so that the registry will NOT recheck that trigger until all callbacks have been processed. Tried doing this with subscriptions as well, but that caused a problem - when we release processes from a stagegate, they (at the moment) immediately place data on the registry that should cause a subscription to fire. Unfortunately, the system will just hang if that subscription doesn't get processed. So, I have left the subscription system alone - any callback function that modifies the registry in a fashion that will fire a subscription will indeed fire that subscription. We'll have to see if this causes problems - it shouldn't, but a careless user could lock things up if the callback generates a callback to itself.
Also fixed the code that placed a process' RML contact info on the registry to eliminate the leading '/' from the string.
This commit was SVN r6684.
2005-07-29 18:11:19 +04:00
|
|
|
/* boolean indicating that this trigger is already being
|
|
|
|
* processed - required to prevent infinite loops should a
|
|
|
|
* callback function modify the registry
|
|
|
|
*/
|
|
|
|
bool processing;
|
2005-03-29 02:37:54 +04:00
|
|
|
/* flag that indicates this trigger is a one-shot, has fired and
|
|
|
|
* now should be cleaned up
|
|
|
|
*/
|
|
|
|
bool one_shot_fired;
|
2005-06-24 20:59:37 +04:00
|
|
|
/* pointers to the counters being monitored. This could
|
2005-03-14 23:57:21 +03:00
|
|
|
* be counters we are using ourselves, or could be counters being run by someone
|
2005-06-24 20:59:37 +04:00
|
|
|
* else. For those triggers that fire at a specified level (as opposed to
|
|
|
|
* comparing values in two or more counters), store the trigger level for
|
|
|
|
* each counter that we are monitoring until they reach a specified level.
|
2005-03-14 23:57:21 +03:00
|
|
|
*/
|
2005-05-01 04:53:00 +04:00
|
|
|
size_t num_counters;
|
2005-03-14 23:57:21 +03:00
|
|
|
orte_pointer_array_t *counters;
|
2005-06-24 20:59:37 +04:00
|
|
|
/* a pointer to the subscriptions associated with this trigger. These
|
|
|
|
* describe the data that will be returned when the trigger fires, and to
|
|
|
|
* whom and where it goes.
|
|
|
|
*/
|
|
|
|
size_t num_subscriptions;
|
|
|
|
orte_pointer_array_t *subscriptions;
|
2005-03-14 23:57:21 +03:00
|
|
|
};
|
2005-06-24 20:59:37 +04:00
|
|
|
typedef struct orte_gpr_replica_trigger_t orte_gpr_replica_trigger_t;
|
|
|
|
|
|
|
|
OBJ_CLASS_DECLARATION(orte_gpr_replica_trigger_t);
|
2004-11-17 01:53:33 +03:00
|
|
|
|
|
|
|
|
2005-06-08 23:40:38 +04:00
|
|
|
/*
|
|
|
|
* Action taken object - used to track what action was taken against what
|
|
|
|
* registry object during the course of a registry request. For example, if
|
|
|
|
* a PUT modifies an existing registry entry, then we store a pointer to that
|
|
|
|
* entry and a flag indicating that it was modified. This info is required for
|
|
|
|
* processing notification subscriptions.
|
|
|
|
*/
|
|
|
|
typedef struct {
|
2005-07-03 20:06:07 +04:00
|
|
|
opal_object_t super; /**< Make this an object */
|
2005-06-08 23:40:38 +04:00
|
|
|
orte_gpr_replica_action_t action;
|
|
|
|
orte_gpr_replica_segment_t *seg;
|
|
|
|
orte_gpr_replica_container_t *cptr;
|
|
|
|
orte_gpr_replica_itagval_t *iptr;
|
|
|
|
} orte_gpr_replica_action_taken_t;
|
|
|
|
|
2005-06-14 01:41:25 +04:00
|
|
|
OBJ_CLASS_DECLARATION(orte_gpr_replica_action_taken_t);
|
2005-06-08 23:40:38 +04:00
|
|
|
|
2004-11-20 22:12:43 +03:00
|
|
|
/*
|
2005-03-14 23:57:21 +03:00
|
|
|
* Callback list objects
|
2004-11-20 22:12:43 +03:00
|
|
|
*/
|
2005-03-14 23:57:21 +03:00
|
|
|
struct orte_gpr_replica_callbacks_t {
|
2005-07-03 20:22:16 +04:00
|
|
|
opal_list_item_t item;
|
2005-03-14 23:57:21 +03:00
|
|
|
orte_process_name_t *requestor;
|
2005-06-24 20:59:37 +04:00
|
|
|
orte_gpr_notify_message_t *message;
|
2005-03-14 23:57:21 +03:00
|
|
|
};
|
|
|
|
typedef struct orte_gpr_replica_callbacks_t orte_gpr_replica_callbacks_t;
|
2004-11-20 22:12:43 +03:00
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
OBJ_CLASS_DECLARATION(orte_gpr_replica_callbacks_t);
|
2004-11-20 22:12:43 +03:00
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
/** List of replicas that hold a stored entry.
|
|
|
|
* Each entry can have an arbitrary number of replicas that hold a copy
|
|
|
|
* of the entry. The GPR requires that each entry be replicated in at least
|
|
|
|
* two locations. This structure is used to create a linked list of
|
|
|
|
* replicas for the entry.
|
|
|
|
*
|
|
|
|
* THIS IS NOT IMPLEMENTED YET
|
2004-11-20 22:12:43 +03:00
|
|
|
*/
|
2005-03-14 23:57:21 +03:00
|
|
|
struct orte_gpr_replica_list_t {
|
2005-07-03 20:22:16 +04:00
|
|
|
opal_list_item_t item; /**< Allows this item to be placed on a list */
|
2005-03-14 23:57:21 +03:00
|
|
|
orte_process_name_t *replica; /**< Name of the replica */
|
|
|
|
};
|
|
|
|
typedef struct orte_gpr_replica_list_t orte_gpr_replica_list_t;
|
2004-11-20 22:12:43 +03:00
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
OBJ_CLASS_DECLARATION(orte_gpr_replica_list_t);
|
2004-11-20 22:12:43 +03:00
|
|
|
|
2005-03-14 23:57:21 +03:00
|
|
|
/** Write invalidate structure.
|
|
|
|
* The structure used to indicate that an entry has been updated somewhere else in the GPR.
|
|
|
|
* The structure contains a flag indicating that the locally stored copy of the entry
|
|
|
|
* is no longer valid, a time tag indicating the time of the last known modification
|
|
|
|
* of the entry within the global registry, and the replica holding the last known
|
|
|
|
* up-to-date version of the entry.
|
|
|
|
*
|
|
|
|
* THIS IS NOT IMPLEMENTED YET
|
2004-11-20 22:12:43 +03:00
|
|
|
*/
|
2005-03-14 23:57:21 +03:00
|
|
|
struct orte_gpr_replica_write_invalidate_t {
|
|
|
|
bool invalidate;
|
|
|
|
time_t last_mod;
|
|
|
|
orte_process_name_t *valid_replica;
|
|
|
|
};
|
|
|
|
typedef struct orte_gpr_replica_write_invalidate_t orte_gpr_replica_write_invalidate_t;
|
2004-11-17 01:53:33 +03:00
|
|
|
|
|
|
|
|
2004-11-20 22:12:43 +03:00
|
|
|
/*
|
2005-03-14 23:57:21 +03:00
|
|
|
* globals needed within component
|
2004-11-20 22:12:43 +03:00
|
|
|
*/
|
2005-03-14 23:57:21 +03:00
|
|
|
extern orte_gpr_replica_t orte_gpr_replica;
|
|
|
|
extern orte_gpr_replica_globals_t orte_gpr_replica_globals;
|
2004-11-17 01:53:33 +03:00
|
|
|
|
2004-11-20 22:12:43 +03:00
|
|
|
|
|
|
|
/*
|
2005-03-14 23:57:21 +03:00
|
|
|
* Module open / close
|
2004-11-20 22:12:43 +03:00
|
|
|
*/
|
2005-03-14 23:57:21 +03:00
|
|
|
int orte_gpr_replica_open(void);
|
|
|
|
int orte_gpr_replica_close(void);
|
2004-11-20 22:12:43 +03:00
|
|
|
|
|
|
|
|
|
|
|
/*
|
2005-03-14 23:57:21 +03:00
|
|
|
* Startup / Shutdown
|
2004-11-20 22:12:43 +03:00
|
|
|
*/
|
2005-03-14 23:57:21 +03:00
|
|
|
orte_gpr_base_module_t *orte_gpr_replica_init(bool *allow_multi_user_threads, bool *have_hidden_threads, int *priority);
|
|
|
|
int orte_gpr_replica_finalize(void);
|
|
|
|
int orte_gpr_replica_module_init(void);
|
2005-05-12 00:21:10 +04:00
|
|
|
|
|
|
|
#if defined(c_plusplus) || defined(__cplusplus)
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2004-08-15 07:33:13 +04:00
|
|
|
#endif
|