1
1

Hey, sports fans!! Guess what??

Here's the huge registry check-in you've all been waiting for with baited breath. The revised version sends a single message to all processes at the various stage gates, thus making the startup much more scalable. I could provide you with all the tawdry details, but won't for now - you are welcome to ask, though, and I'll merrily bore your ears to tears.

In addition, the commit contains the following:

1. set the ignore properties on ompi/debuggers and orte/mca/pls/poe

2. Added simplified subscribe and put functions to the registry's API. I have also converted all of the ompi functions that registered subscriptions to the new API, and caught their associated put's as well.

In a follow-on commit, I'll be adding support for George's hetero arch registry subscription (wanted to get this one in first).

This commit was SVN r7118.
Этот коммит содержится в:
Ralph Castain 2005-09-01 01:07:30 +00:00
родитель 4ac2445c61
Коммит 96f4bb7a63
51 изменённых файлов: 3040 добавлений и 2494 удалений

Просмотреть файл

@ -104,9 +104,8 @@ static int set_f(int keyval, MPI_Fint value);
int ompi_attr_create_predefined(void)
{
int rc, ret;
orte_gpr_trigger_t trig, *trig1;
orte_gpr_value_t value, *values;
orte_gpr_subscription_t sub, *sub1;
orte_gpr_subscription_id_t id;
char *sub_name, *trig_name;
orte_jobid_t job;
/* Create all the keyvals */
@ -177,99 +176,40 @@ int ompi_attr_create_predefined(void)
return rc;
}
OBJ_CONSTRUCT(&sub, orte_gpr_subscription_t);
/* indicate that this is a standard subscription. This indicates that the
* subscription will be common to all processes. Thus, the resulting data
* can be consolidated into a process-independent message and broadcast
* to all processes
*/
if (ORTE_SUCCESS != (rc = orte_schema.get_std_subscription_name(&(sub.name),
OMPI_ATTRIBUTE_SUBSCRIPTION, job))) {
/* indicate that this is a standard subscription. This indicates
that the subscription will be common to all processes. Thus,
the resulting data can be consolidated into a
process-independent message and broadcast to all processes */
if (ORTE_SUCCESS !=
(rc = orte_schema.get_std_subscription_name(&sub_name,
OMPI_ATTRIBUTE_SUBSCRIPTION, job))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* send data when trigger fires, then delete -
* no need for further notifications
*/
sub.action = ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG;
OBJ_CONSTRUCT(&value, orte_gpr_value_t);
values = &value;
sub.values = &values;
sub.cnt = 1;
value.addr_mode = ORTE_GPR_TOKENS_OR | ORTE_GPR_KEYS_OR;
value.segment = strdup(ORTE_NODE_SEGMENT);
if (NULL == value.segment) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value.tokens = NULL; /* wildcard - look at all containers */
value.num_tokens = 0;
value.cnt = 1;
value.keyvals = (orte_gpr_keyval_t**)malloc(sizeof(orte_gpr_keyval_t*));
if (NULL == value.keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value.keyvals[0] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == value.keyvals[0]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value.keyvals[0]->key = strdup(ORTE_NODE_SLOTS_KEY);
if (NULL == value.keyvals[0]->key) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub.cbfunc = ompi_attr_create_predefined_callback;
sub.user_tag = NULL;
/* setup the trigger information */
OBJ_CONSTRUCT(&trig, orte_gpr_trigger_t);
if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&(trig.name),
ORTE_STG1_TRIGGER, job))) {
/* attach ourselves to the standard stage-1 trigger */
if (ORTE_SUCCESS !=
(rc = orte_schema.get_std_trigger_name(&trig_name,
ORTE_STG1_TRIGGER, job))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
free(sub_name);
return rc;
}
/* this is an ORTE-standard trigger that is defined by the ORTE resource manager
* when the job was launched - therefore, we don't need to provide any additional
* info
*/
/* do the subscription */
sub1 = ⊂
trig1 = &trig;
rc = orte_gpr.subscribe(1, &sub1, 1, &trig1);
if(ORTE_SUCCESS != rc) {
opal_output(0, "ompi_attr_create_predefined: subscribe failed");
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
OBJ_DESTRUCT(&trig);
return OMPI_ERROR;
if (ORTE_SUCCESS != (rc = orte_gpr.subscribe_1(&id, trig_name, sub_name,
ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG,
ORTE_GPR_TOKENS_OR | ORTE_GPR_KEYS_OR,
ORTE_NODE_SEGMENT,
NULL, /* wildcard - look at all containers */
ORTE_NODE_SLOTS_KEY,
ompi_attr_create_predefined_callback, NULL))) {
ORTE_ERROR_LOG(rc);
}
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
OBJ_DESTRUCT(&trig);
return OMPI_SUCCESS;
free(trig_name);
free(sub_name);
return rc;
}

Просмотреть файл

@ -380,8 +380,8 @@ opal_output(0, "[%lu,%lu,%lu] mca_pml_base_modex_registry_callback: %s-%s-%d-%d
static int mca_pml_base_modex_subscribe(orte_process_name_t* name)
{
orte_gpr_trigger_t trig, *trigs;
orte_gpr_subscription_t sub, *subs;
char *segment, *sub_name, *trig_name;
orte_gpr_subscription_id_t sub_id;
orte_jobid_t jobid;
opal_list_item_t* item;
mca_pml_base_modex_subscription_t* subscription;
@ -408,111 +408,55 @@ static int mca_pml_base_modex_subscribe(orte_process_name_t* name)
return rc;
}
/* setup the subscription definition */
OBJ_CONSTRUCT(&sub, orte_gpr_subscription_t);
/* indicate that this is a standard subscription. This indicates that the
* subscription will be common to all processes. Thus, the resulting data
* can be consolidated into a process-independent message and broadcast
* to all processes
*/
if (ORTE_SUCCESS != (rc = orte_schema.get_std_subscription_name(&(sub.name),
if (ORTE_SUCCESS != (rc = orte_schema.get_std_subscription_name(&sub_name,
OMPI_MODEX_SUBSCRIPTION, jobid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* send data when trigger fires, continue to monitor. The default
* action for any subscription that includes a trigger condition is
* to send the specified data when the trigger fires. This set of flags
* indicates that - AFTER the trigger fires - the subscription should
* continue to send data any time an entry is added or changed.
*/
sub.action = ORTE_GPR_NOTIFY_ADD_ENTRY |
ORTE_GPR_NOTIFY_VALUE_CHG |
ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG;
/* setup the value structures that describe the data to
* be monitored and returned by this subscription
*/
sub.cnt = 1;
sub.values = (orte_gpr_value_t**)malloc(sizeof(orte_gpr_value_t*));
if (NULL == sub.values) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub.values[0] = OBJ_NEW(orte_gpr_value_t);
if (NULL == sub.values[0]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub.cnt = 1;
/* define the segment */
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(
&(sub.values[0]->segment), jobid))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&sub);
return rc;
}
sub.values[0]->addr_mode = ORTE_GPR_KEYS_OR | ORTE_GPR_TOKENS_OR;
/* look at all containers on this segment */
sub.values[0]->tokens = NULL;
sub.values[0]->num_tokens = 0;
/* look for any keyval with "modex" key */
sub.values[0]->cnt = 1;
sub.values[0]->keyvals = (orte_gpr_keyval_t**)malloc(sizeof(orte_gpr_keyval_t*));
if (NULL == sub.values[0]->keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub.values[0]->keyvals[0] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == sub.values[0]->keyvals[0]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub.values[0]->keyvals[0]->key = strdup("modex");
if (NULL == sub.values[0]->keyvals[0]->key) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* define the callback function */
sub.cbfunc = mca_pml_base_modex_registry_callback;
sub.user_tag = NULL;
/* setup the trigger definition */
OBJ_CONSTRUCT(&trig, orte_gpr_trigger_t);
if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&(trig.name),
/* attach to the stage-1 standard trigger */
if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&trig_name,
ORTE_STG1_TRIGGER, jobid))) {
ORTE_ERROR_LOG(rc);
free(sub_name);
return rc;
}
/* this is an ORTE-standard trigger that is defined by the ORTE resource manager
* when the job was launched - therefore, we don't need to provide any additional
* info
*/
/* register the subscription */
subs = ⊂
trigs = &trig;
rc = orte_gpr.subscribe(1, &subs, 1, &trigs);
if(ORTE_SUCCESS != rc) {
opal_output(0, "mca_pml_base_modex_exchange: "
"orte_gpr.subscribe failed with return code %d\n", rc);
OBJ_DESTRUCT(&sub);
OBJ_DESTRUCT(&trig);
return OMPI_ERROR;
/* define the segment */
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) {
ORTE_ERROR_LOG(rc);
free(sub_name);
free(trig_name);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_gpr.subscribe_1(&sub_id, trig_name, sub_name,
ORTE_GPR_NOTIFY_ADD_ENTRY |
ORTE_GPR_NOTIFY_VALUE_CHG |
ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG,
ORTE_GPR_KEYS_OR | ORTE_GPR_TOKENS_OR,
segment,
NULL, /* look at all containers on this segment */
"modex",
mca_pml_base_modex_registry_callback, NULL))) {
ORTE_ERROR_LOG(rc);
opal_output(0, "mca_pml_base_modex_exchange: "
"orte_gpr.subscribe failed with return code %d\n", rc);
free(sub_name);
free(trig_name);
free(segment);
return rc;
}
free(sub_name);
free(trig_name);
free(segment);
/* add this jobid to our list of subscriptions */
OPAL_LOCK(&mca_pml_base_modex_lock);
subscription = OBJ_NEW(mca_pml_base_modex_subscription_t);
subscription->jobid = name->jobid;
opal_list_append(&mca_pml_base_modex_subscriptions, &subscription->item);
OPAL_UNLOCK(&mca_pml_base_modex_lock);
OBJ_DESTRUCT(&sub);
OBJ_DESTRUCT(&trig);
return OMPI_SUCCESS;
}

Просмотреть файл

@ -25,6 +25,7 @@
#include "orte/mca/oob/oob.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/proc_info.h"
#include "ompi/proc/proc.h"
#include "ompi/mca/pml/pml.h"
@ -336,15 +337,14 @@ int ompi_proc_get_proclist (orte_buffer_t* buf, int proclistsize, ompi_proc_t **
static int setup_registry_callback(void)
{
int rc;
char *segment;
char *segment, *sub_name, *trig_name, *keys[2];
ompi_proc_t *local = ompi_proc_local();
orte_gpr_subscription_id_t id;
orte_jobid_t jobid;
orte_gpr_trigger_t trig, *trig1;
orte_gpr_value_t value, *values;
orte_gpr_subscription_t sub, *sub1;
if (ORTE_SUCCESS != orte_ns.get_jobid(&jobid, &local->proc_name)) {
printf("Badness!\n");
if (ORTE_SUCCESS != (rc = orte_ns.get_jobid(&jobid, &local->proc_name))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* find the job segment on the registry */
@ -353,88 +353,52 @@ static int setup_registry_callback(void)
return rc;
}
OBJ_CONSTRUCT(&sub, orte_gpr_subscription_t);
/* indicate that this is a standard subscription. This indicates
that the subscription will be common to all processes. Thus,
the resulting data can be consolidated into a
process-independent message and broadcast to all processes */
if (ORTE_SUCCESS !=
(rc = orte_schema.get_std_subscription_name(&(sub.name),
(rc = orte_schema.get_std_subscription_name(&sub_name,
OMPI_PROC_SUBSCRIPTION, jobid))) {
ORTE_ERROR_LOG(rc);
free(segment);
return rc;
}
/* send data when trigger fires, then delete - no need for further
notifications */
sub.action = ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG;
OBJ_CONSTRUCT(&value, orte_gpr_value_t);
values = &value;
sub.values = &values;
sub.cnt = 1;
value.addr_mode = ORTE_GPR_TOKENS_OR | ORTE_GPR_KEYS_OR;
value.segment = segment;
value.tokens = NULL; /* wildcard - look at all containers */
value.num_tokens = 0;
value.cnt = 2;
value.keyvals =
(orte_gpr_keyval_t**)malloc(sizeof(orte_gpr_keyval_t*) * 2);
if (NULL == value.keyvals) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
value.keyvals[0] = NULL;
value.keyvals[1] = NULL;
value.keyvals[0] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == value.keyvals[0]) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
value.keyvals[0]->key = strdup(ORTE_PROC_NAME_KEY);
if (NULL == value.keyvals[0]->key) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
value.keyvals[1] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == value.keyvals[0]) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
value.keyvals[1]->key = strdup(ORTE_NODE_NAME_KEY);
if (NULL == value.keyvals[0]->key) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
/* define the keys to be returned */
keys[0] = strdup(ORTE_PROC_NAME_KEY);
keys[1] = strdup(ORTE_NODE_NAME_KEY);
/* Here we have to add another key to the registry to be able to get the information
* about the remote architectures.
* TODO: George.
*/
sub.cbfunc = callback;
sub.user_tag = NULL;
/* setup the trigger information */
OBJ_CONSTRUCT(&trig, orte_gpr_trigger_t);
/* attach ourselves to the standard stage-1 trigger */
if (ORTE_SUCCESS !=
(rc = orte_schema.get_std_trigger_name(&(trig.name),
(rc = orte_schema.get_std_trigger_name(&trig_name,
ORTE_STG1_TRIGGER, jobid))) {
goto cleanup;
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
/* do the subscription */
sub1 = ⊂
trig1 = &trig;
rc = orte_gpr.subscribe(1, &sub1, 1, &trig1);
if (ORTE_SUCCESS != (rc = orte_gpr.subscribe_N(&id, trig_name, sub_name,
ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG,
ORTE_GPR_TOKENS_OR | ORTE_GPR_KEYS_OR,
segment,
NULL, /* wildcard - look at all containers */
2, keys,
callback, NULL))) {
ORTE_ERROR_LOG(rc);
}
free(trig_name);
CLEANUP:
free(segment);
free(sub_name);
free(keys[0]);
free(keys[1]);
cleanup:
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
OBJ_DESTRUCT(&trig);
return rc;
}

Просмотреть файл

@ -93,7 +93,8 @@ int ompi_mpi_finalize(void)
/*
* Wait for everyone to get here
*/
if (ORTE_SUCCESS != (ret = orte_rml.xcast(NULL, NULL, 0, NULL, NULL))) {
if (ORTE_SUCCESS != (ret = orte_rml.xcast(NULL, NULL, 0, NULL,
orte_gpr.deliver_notify_msg, NULL))) {
ORTE_ERROR_LOG(ret);
return ret;
}
@ -219,7 +220,8 @@ int ompi_mpi_finalize(void)
* the RTE while the soh is trying to do the update - which causes
* an ugly race condition
*/
if (ORTE_SUCCESS != (ret = orte_rml.xcast(NULL, NULL, 0, NULL, NULL))) {
if (ORTE_SUCCESS != (ret = orte_rml.xcast(NULL, NULL, 0, NULL,
orte_gpr.deliver_notify_msg, NULL))) {
ORTE_ERROR_LOG(ret);
return ret;
}

Просмотреть файл

@ -179,7 +179,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
}
}
#ifndef WIN32
#if 0
if (OMPI_SUCCESS != (ret = opal_util_register_stackhandlers ())) {
error = "util_register_stackhandlers() failed";
goto error;
@ -363,7 +363,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
}
/* FIRST BARRIER - WAIT FOR MSG FROM RMGR_PROC_STAGE_GATE_MGR TO ARRIVE */
if (ORTE_SUCCESS != (ret = orte_rml.xcast(NULL, NULL, 0, NULL, NULL))) {
if (ORTE_SUCCESS != (ret = orte_rml.xcast(NULL, NULL, 0, NULL,
orte_gpr.deliver_notify_msg, NULL))) {
ORTE_ERROR_LOG(ret);
error = "ompi_mpi_init: failed to see all procs register\n";
goto error;
@ -469,7 +470,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
/* Second barrier -- wait for message from
RMGR_PROC_STAGE_GATE_MGR to arrive */
if (ORTE_SUCCESS != (ret = orte_rml.xcast(NULL, NULL, 0, NULL, NULL))) {
if (ORTE_SUCCESS != (ret = orte_rml.xcast(NULL, NULL, 0, NULL,
orte_gpr.deliver_notify_msg, NULL))) {
ORTE_ERROR_LOG(ret);
error = "ompi_mpi_init: failed to see all procs register\n";
goto error;

Просмотреть файл

@ -90,12 +90,13 @@ typedef uint8_t orte_data_type_t ;
#define ORTE_GPR_TRIGGER (orte_data_type_t) 44 /**< describes trigger conditions */
#define ORTE_GPR_NOTIFY_DATA (orte_data_type_t) 45 /**< data returned from a subscription */
#define ORTE_GPR_NOTIFY_MSG (orte_data_type_t) 46 /**< notify message containing notify_data objects */
#define ORTE_GPR_NOTIFY_MSG_TYPE (orte_data_type_t) 47 /**< notify message type (subscription or trigger) */
/* Resource Manager types */
#define ORTE_APP_CONTEXT (orte_data_type_t) 47 /**< argv and enviro arrays */
#define ORTE_APP_CONTEXT_MAP (orte_data_type_t) 48 /**< application context mapping array */
#define ORTE_APP_CONTEXT (orte_data_type_t) 48 /**< argv and enviro arrays */
#define ORTE_APP_CONTEXT_MAP (orte_data_type_t) 49 /**< application context mapping array */
/* define the starting point for dynamically assigning data types */
#define ORTE_DPS_ID_DYNAMIC 50
#define ORTE_DPS_ID_DYNAMIC 60
/* define a structure to hold generic byte objects */
typedef struct {
@ -103,12 +104,4 @@ typedef struct {
uint8_t *bytes;
} orte_byte_object_t;
/* define a print format to handle the variations in pid_t */
#if SIZEOF_PID_T == SIZEOF_INT
#define ORTE_PID_T_PRINTF "%u"
#elif SIZEOF_PID_T == SIZEOF_LONG
#define ORTE_PID_T_PRINTF "%lu"
#endif
#endif

Просмотреть файл

@ -85,27 +85,29 @@ extern "C" {
/*
* Define flag values for remote commands
*/
#define ORTE_GPR_DELETE_SEGMENT_CMD (uint8_t) 1
#define ORTE_GPR_PUT_CMD (uint8_t) 2
#define ORTE_GPR_DELETE_ENTRIES_CMD (uint8_t) 3
#define ORTE_GPR_INDEX_CMD (uint8_t) 4
#define ORTE_GPR_SUBSCRIBE_CMD (uint8_t) 5
#define ORTE_GPR_UNSUBSCRIBE_CMD (uint8_t) 6
#define ORTE_GPR_CANCEL_TRIGGER_CMD (uint8_t) 7
#define ORTE_GPR_GET_CMD (uint8_t) 8
#define ORTE_GPR_TEST_INTERNALS_CMD (uint8_t) 9
#define ORTE_GPR_NOTIFY_CMD (uint8_t) 10
#define ORTE_GPR_DUMP_ALL_CMD (uint8_t) 11
#define ORTE_GPR_DUMP_SEGMENTS_CMD (uint8_t) 12
#define ORTE_GPR_DUMP_TRIGGERS_CMD (uint8_t) 13
#define ORTE_GPR_DUMP_SUBSCRIPTIONS_CMD (uint8_t) 14
#define ORTE_GPR_DUMP_CALLBACKS_CMD (uint8_t) 15
#define ORTE_GPR_INCREMENT_VALUE_CMD (uint8_t) 16
#define ORTE_GPR_DECREMENT_VALUE_CMD (uint8_t) 17
#define ORTE_GPR_COMPOUND_CMD (uint8_t) 18
#define ORTE_GPR_CLEANUP_JOB_CMD (uint8_t) 19
#define ORTE_GPR_CLEANUP_PROC_CMD (uint8_t) 20
#define ORTE_GPR_ERROR (uint8_t)0xff
#define ORTE_GPR_DELETE_SEGMENT_CMD (uint8_t) 1
#define ORTE_GPR_PUT_CMD (uint8_t) 2
#define ORTE_GPR_DELETE_ENTRIES_CMD (uint8_t) 3
#define ORTE_GPR_INDEX_CMD (uint8_t) 4
#define ORTE_GPR_SUBSCRIBE_CMD (uint8_t) 5
#define ORTE_GPR_UNSUBSCRIBE_CMD (uint8_t) 6
#define ORTE_GPR_CANCEL_TRIGGER_CMD (uint8_t) 7
#define ORTE_GPR_GET_CMD (uint8_t) 8
#define ORTE_GPR_TEST_INTERNALS_CMD (uint8_t) 9
#define ORTE_GPR_NOTIFY_CMD (uint8_t) 10
#define ORTE_GPR_DUMP_ALL_CMD (uint8_t) 11
#define ORTE_GPR_DUMP_SEGMENTS_CMD (uint8_t) 12
#define ORTE_GPR_DUMP_TRIGGERS_CMD (uint8_t) 13
#define ORTE_GPR_DUMP_SUBSCRIPTIONS_CMD (uint8_t) 14
#define ORTE_GPR_DUMP_CALLBACKS_CMD (uint8_t) 15
#define ORTE_GPR_INCREMENT_VALUE_CMD (uint8_t) 16
#define ORTE_GPR_DECREMENT_VALUE_CMD (uint8_t) 17
#define ORTE_GPR_COMPOUND_CMD (uint8_t) 18
#define ORTE_GPR_CLEANUP_JOB_CMD (uint8_t) 19
#define ORTE_GPR_CLEANUP_PROC_CMD (uint8_t) 20
#define ORTE_GPR_DUMP_A_TRIGGER_CMD (uint8_t) 21
#define ORTE_GPR_DUMP_A_SUBSCRIPTION_CMD (uint8_t) 22
#define ORTE_GPR_ERROR (uint8_t)0xff
typedef uint8_t orte_gpr_cmd_flag_t;
#define ORTE_GPR_CMD_T ORTE_UINT8
@ -162,14 +164,26 @@ typedef uint8_t orte_gpr_cmd_flag_t;
orte_gpr_trigger_cb_fn_t cbfunc,
void *user_tag);
OMPI_DECLSPEC int orte_gpr_base_define_trigger_level(orte_gpr_trigger_id_t *id,
char *trig_name,
orte_gpr_trigger_action_t action,
orte_gpr_addr_mode_t addr_mode,
char *segment,
char **tokens,
size_t n,
char **keys,
size_t *levels,
orte_gpr_trigger_cb_fn_t cbfunc,
void *user_tag);
/* general usage functions */
OMPI_DECLSPEC int orte_gpr_base_pack_delete_segment(orte_buffer_t *cmd,
char *segment);
OMPI_DECLSPEC int orte_gpr_base_unpack_delete_segment(orte_buffer_t *buffer, int *ret);
OMPI_DECLSPEC int orte_gpr_base_pack_delete_entries(orte_buffer_t *buffer,
orte_gpr_addr_mode_t mode,
char *segment, char **tokens, char **keys);
orte_gpr_addr_mode_t mode,
char *segment, char **tokens, char **keys);
OMPI_DECLSPEC int orte_gpr_base_unpack_delete_entries(orte_buffer_t *buffer, int *ret);
OMPI_DECLSPEC int orte_gpr_base_pack_index(orte_buffer_t *cmd, char *segment);
@ -183,7 +197,7 @@ typedef uint8_t orte_gpr_cmd_flag_t;
OMPI_DECLSPEC int orte_gpr_base_unpack_subscribe(orte_buffer_t *buffer, int *ret);
OMPI_DECLSPEC int orte_gpr_base_pack_unsubscribe(orte_buffer_t *cmd,
orte_gpr_subscription_id_t id);
orte_gpr_subscription_id_t id);
OMPI_DECLSPEC int orte_gpr_base_unpack_unsubscribe(orte_buffer_t *buffer, int *ret);
OMPI_DECLSPEC int orte_gpr_base_pack_cancel_trigger(orte_buffer_t *cmd,
@ -191,19 +205,26 @@ typedef uint8_t orte_gpr_cmd_flag_t;
OMPI_DECLSPEC int orte_gpr_base_unpack_cancel_trigger(orte_buffer_t *buffer, int *ret);
OMPI_DECLSPEC int orte_gpr_base_pack_put(orte_buffer_t *cmd,
size_t cnt, orte_gpr_value_t **values);
size_t cnt, orte_gpr_value_t **values);
OMPI_DECLSPEC int orte_gpr_base_unpack_put(orte_buffer_t *buffer, int *ret);
OMPI_DECLSPEC int orte_gpr_base_pack_get(orte_buffer_t *cmd,
orte_gpr_addr_mode_t mode,
char *segment, char **tokens, char **keys);
orte_gpr_addr_mode_t mode,
char *segment, char **tokens, char **keys);
OMPI_DECLSPEC int orte_gpr_base_unpack_get(orte_buffer_t *buffer, int *ret,
size_t *cnt, orte_gpr_value_t ***values);
OMPI_DECLSPEC int orte_gpr_base_pack_dump_all(orte_buffer_t *cmd);
OMPI_DECLSPEC int orte_gpr_base_pack_dump_segments(orte_buffer_t *cmd, char *segment);
OMPI_DECLSPEC int orte_gpr_base_pack_dump_triggers(orte_buffer_t *cmd);
OMPI_DECLSPEC int orte_gpr_base_pack_dump_subscriptions(orte_buffer_t *cmd);
OMPI_DECLSPEC int orte_gpr_base_pack_dump_triggers(orte_buffer_t *cmd,
orte_gpr_trigger_id_t start);
OMPI_DECLSPEC int orte_gpr_base_pack_dump_subscriptions(orte_buffer_t *cmd,
orte_gpr_subscription_id_t start);
OMPI_DECLSPEC int orte_gpr_base_pack_dump_a_trigger(orte_buffer_t *cmd,
char *name, orte_gpr_trigger_id_t id);
OMPI_DECLSPEC int orte_gpr_base_pack_dump_a_subscription(orte_buffer_t *cmd,
char *name,
orte_gpr_subscription_id_t id);
OMPI_DECLSPEC int orte_gpr_base_pack_dump_callbacks(orte_buffer_t *cmd);
OMPI_DECLSPEC int orte_gpr_base_print_dump(orte_buffer_t *buffer, int output_id);
OMPI_DECLSPEC void orte_gpr_base_dump_keyval_value(orte_buffer_t *buffer,
@ -246,6 +267,9 @@ int orte_gpr_base_pack_notify_action(orte_buffer_t *buffer, void *src,
int orte_gpr_base_pack_trigger_action(orte_buffer_t *buffer, void *src,
size_t num_vals, orte_data_type_t type);
int orte_gpr_base_pack_notify_msg_type(orte_buffer_t *buffer, void *src,
size_t num_vals, orte_data_type_t type);
int orte_gpr_base_pack_addr_mode(orte_buffer_t *buffer, void *src,
size_t num_vals, orte_data_type_t type);
@ -286,6 +310,9 @@ int orte_gpr_base_unpack_trigger_action(orte_buffer_t *buffer, void *dest,
int orte_gpr_base_unpack_addr_mode(orte_buffer_t *buffer, void *dest,
size_t *num_vals, orte_data_type_t type);
int orte_gpr_base_unpack_notify_msg_type(orte_buffer_t *buffer, void *dest,
size_t *num_vals, orte_data_type_t type);
int orte_gpr_base_unpack_keyval(orte_buffer_t *buffer, void *dest,
size_t *num_vals, orte_data_type_t type);

Просмотреть файл

@ -116,6 +116,22 @@ int orte_gpr_base_pack_addr_mode(orte_buffer_t *buffer, void *src,
return rc;
}
/*
* NOTIFY MSG TYPE
*/
int orte_gpr_base_pack_notify_msg_type(orte_buffer_t *buffer, void *src,
size_t num_vals, orte_data_type_t type)
{
int rc;
if (ORTE_SUCCESS != (rc = orte_dps_pack_buffer(buffer, src, num_vals, ORTE_GPR_NOTIFY_MSG_TYPE_T))) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/*
* KEYVAL
*/
@ -347,7 +363,7 @@ int orte_gpr_base_pack_notify_data(orte_buffer_t *buffer, void *src,
/* pack the subscription name */
if (ORTE_SUCCESS != (rc = orte_dps_pack_buffer(buffer,
(void*)(&(data[i]->name)), 1, ORTE_STRING))) {
(void*)(&(data[i]->target)), 1, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -410,9 +426,16 @@ int orte_gpr_base_pack_notify_msg(orte_buffer_t *buffer, void *src,
for (i=0; i<num_vals; i++) {
/* pack the message type */
if (ORTE_SUCCESS != (rc = orte_dps_pack_buffer(buffer,
(void*)(&(msg[i]->msg_type)), 1, ORTE_GPR_NOTIFY_MSG_TYPE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the trigger name */
if (ORTE_SUCCESS != (rc = orte_dps_pack_buffer(buffer,
(void*)(&(msg[i]->name)), 1, ORTE_STRING))) {
(void*)(&(msg[i]->target)), 1, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}

Просмотреть файл

@ -101,6 +101,21 @@ int orte_gpr_base_unpack_trigger_action(orte_buffer_t *buffer, void *dest,
return rc;
}
/*
* NOTIFY MSG TYPE
*/
int orte_gpr_base_unpack_notify_msg_type(orte_buffer_t *buffer, void *dest,
size_t *num_vals, orte_data_type_t type)
{
int rc;
if (ORTE_SUCCESS != (rc = orte_dps_unpack_buffer(buffer, dest, num_vals, ORTE_GPR_NOTIFY_MSG_TYPE_T))) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/*
* ADDR MODE
*/
@ -408,7 +423,7 @@ int orte_gpr_base_unpack_notify_data(orte_buffer_t *buffer, void *dest,
}
/* unpack the subscription name */
if (ORTE_SUCCESS != (rc = orte_dps_unpack_buffer(buffer, &(data[i]->name),
if (ORTE_SUCCESS != (rc = orte_dps_unpack_buffer(buffer, &(data[i]->target),
&max_n, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
@ -479,8 +494,15 @@ int orte_gpr_base_unpack_notify_msg(orte_buffer_t *buffer, void *dest,
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* unpack the message type */
if (ORTE_SUCCESS != (rc = orte_dps_unpack_buffer(buffer, &(msg[i]->msg_type),
&max_n, ORTE_GPR_NOTIFY_MSG_TYPE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the trigger name */
if (ORTE_SUCCESS != (rc = orte_dps_unpack_buffer(buffer, &(msg[i]->name),
if (ORTE_SUCCESS != (rc = orte_dps_unpack_buffer(buffer, &(msg[i]->target),
&max_n, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;

Просмотреть файл

@ -112,7 +112,7 @@ static void orte_gpr_value_destructor(orte_gpr_value_t* reg_val)
if (NULL != reg_val->keyvals[i])
OBJ_RELEASE(reg_val->keyvals[i]);
}
free(reg_val->keyvals);
free(reg_val->keyvals);
}
if (0 < reg_val->num_tokens && NULL != reg_val->tokens) {
@ -127,17 +127,17 @@ static void orte_gpr_value_destructor(orte_gpr_value_t* reg_val)
/* define instance of opal_class_t */
OBJ_CLASS_INSTANCE(
orte_gpr_value_t, /* type name */
opal_object_t, /* parent "class" name */
orte_gpr_value_construct, /* constructor */
orte_gpr_value_destructor); /* destructor */
orte_gpr_value_t, /* type name */
opal_object_t, /* parent "class" name */
orte_gpr_value_construct, /* constructor */
orte_gpr_value_destructor); /* destructor */
/** NOTIFY DATA **/
/* constructor - used to initialize state of registry value instance */
static void orte_gpr_notify_data_construct(orte_gpr_notify_data_t* ptr)
{
ptr->name = NULL;
ptr->target = NULL;
ptr->id = ORTE_GPR_SUBSCRIPTION_ID_MAX;
ptr->remove = false;
ptr->cnt = 0;
@ -153,7 +153,7 @@ static void orte_gpr_notify_data_destructor(orte_gpr_notify_data_t* ptr)
size_t i, j;
orte_gpr_value_t **values;
if (NULL != ptr->name) free(ptr->name);
if (NULL != ptr->target) free(ptr->target);
if (NULL != ptr->values) {
values = (orte_gpr_value_t**)(ptr->values)->addr;
@ -251,7 +251,8 @@ OBJ_CLASS_INSTANCE(
/* constructor - used to initialize notify message instance */
static void orte_gpr_notify_message_construct(orte_gpr_notify_message_t* msg)
{
msg->name = NULL;
msg->msg_type = 0;
msg->target = NULL;
msg->id = ORTE_GPR_TRIGGER_ID_MAX;
msg->remove = false;
msg->cnt = 0;
@ -266,7 +267,7 @@ static void orte_gpr_notify_message_destructor(orte_gpr_notify_message_t* msg)
size_t i, j;
orte_gpr_notify_data_t **data;
if (NULL != msg->name) free(msg->name);
if (NULL != msg->target) free(msg->target);
if (NULL != msg->data) {
data = (orte_gpr_notify_data_t**)(msg->data)->addr;
@ -373,6 +374,14 @@ int orte_gpr_base_open(void)
return rc;
}
tmp = ORTE_GPR_NOTIFY_MSG_TYPE;
if (ORTE_SUCCESS != (rc = orte_dps.register_type(orte_gpr_base_pack_notify_msg_type,
orte_gpr_base_unpack_notify_msg_type,
"ORTE_GPR_NOTIFY_MSG_TYPE", &tmp))) {
ORTE_ERROR_LOG(rc);
return rc;
}
tmp = ORTE_GPR_ADDR_MODE;
if (ORTE_SUCCESS != (rc = orte_dps.register_type(orte_gpr_base_pack_addr_mode,
orte_gpr_base_unpack_addr_mode,

Просмотреть файл

@ -57,6 +57,7 @@ int orte_gpr_base_subscribe_1(orte_gpr_subscription_id_t *id,
/* assemble the subscription object */
subs = &sub;
sub.name = sub_name;
sub.action = action;
sub.cnt = 1;
values = &value;
@ -222,6 +223,16 @@ int orte_gpr_base_define_trigger(orte_gpr_trigger_id_t *id,
size_t i, j;
int rc;
/* check for error - this function can only be used to define triggers
* that compare their values to each other. It cannot be used to define
* triggers that fire when reaching a specified value as there is no
* way to specify a trigger level within this API
*/
if (ORTE_GPR_TRIG_AT_LEVEL & action) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
/* assemble the trigger object */
trigs = &trig;
trig.name = trig_name;
@ -281,3 +292,95 @@ int orte_gpr_base_define_trigger(orte_gpr_trigger_id_t *id,
return rc;
}
int orte_gpr_base_define_trigger_level(orte_gpr_trigger_id_t *id,
char *trig_name,
orte_gpr_trigger_action_t action,
orte_gpr_addr_mode_t addr_mode,
char *segment,
char **tokens,
size_t n,
char **keys,
size_t *levels,
orte_gpr_trigger_cb_fn_t cbfunc,
void *user_tag)
{
orte_gpr_value_t *values;
orte_gpr_value_t value = { {OBJ_CLASS(opal_object_t),0},
ORTE_GPR_TOKENS_AND,
NULL, 0, NULL, 0, NULL };
orte_gpr_trigger_t *trigs;
orte_gpr_trigger_t trig = { {OBJ_CLASS(opal_object_t),0},
NULL, 0, 0, 0, NULL, 0, NULL };
size_t i, j;
int rc;
/* check for error - this function can only be used to define triggers
* that fire at a specified level. It cannot be used to define
* triggers that compare their values to each other
*/
if (ORTE_GPR_TRIG_CMP_LEVELS & action) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
/* assemble the trigger object */
trigs = &trig;
trig.name = trig_name;
trig.action = action;
trig.cnt = 1;
values = &value;
trig.values = &values;
trig.cbfunc = cbfunc;
trig.user_tag = user_tag;
value.addr_mode = addr_mode;
value.segment = segment;
value.cnt = n;
value.keyvals = (orte_gpr_keyval_t**)malloc(n * sizeof(orte_gpr_keyval_t*));
if (NULL == value.keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
for (i=0; i < n; i++) {
value.keyvals[i] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == value.keyvals[i]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
for (j=0; j < i; j++) OBJ_RELEASE(value.keyvals[j]);
free(value.keyvals);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value.keyvals[i]->key = keys[i];
value.keyvals[i]->type = ORTE_SIZE;
value.keyvals[i]->value.intval = levels[i];
}
value.tokens = tokens;
/* must count the number of tokens */
if (NULL == tokens) {
value.num_tokens = 0;
} else {
for (i=0; NULL != tokens[i]; i++) {
(value.num_tokens)++;
}
}
/* send the subscription */
if (ORTE_SUCCESS != (rc = orte_gpr.subscribe(0, NULL, 1, &trigs))) {
ORTE_ERROR_LOG(rc);
}
/* clean up memory - very carefully!
* We can't use the object destructors because we didn't
* copy input data fields into the objects. Thus, only
* release the data that we explicitly allocated
*/
for (i=0; i < n; i++) free(value.keyvals[i]);
free(value.keyvals);
/* return the subscription id */
*id = trig.id;
return rc;
}

Просмотреть файл

@ -54,6 +54,10 @@ int orte_gpr_base_xfer_payload(orte_gpr_value_union_t *dest,
dest->pid = src->pid;
break;
case ORTE_INT:
dest->intval = src->intval;
break;
case ORTE_UINT8:
dest->ui8 = src->ui8;
break;

Просмотреть файл

@ -60,22 +60,92 @@ int orte_gpr_base_pack_dump_segments(orte_buffer_t *cmd, char *segment)
return ORTE_SUCCESS;
}
int orte_gpr_base_pack_dump_triggers(orte_buffer_t *cmd)
int orte_gpr_base_pack_dump_triggers(orte_buffer_t *cmd, orte_gpr_trigger_id_t start)
{
orte_gpr_cmd_flag_t command;
int rc;
command = ORTE_GPR_DUMP_TRIGGERS_CMD;
return orte_dps.pack(cmd, &command, 1, ORTE_GPR_CMD);
if (ORTE_SUCCESS != (rc = orte_dps.pack(cmd, &command, 1, ORTE_GPR_CMD))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_dps.pack(cmd, &start, 1, ORTE_GPR_TRIGGER_ID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
}
int orte_gpr_base_pack_dump_subscriptions(orte_buffer_t *cmd)
int orte_gpr_base_pack_dump_subscriptions(orte_buffer_t *cmd, orte_gpr_subscription_id_t start)
{
orte_gpr_cmd_flag_t command;
int rc;
command = ORTE_GPR_DUMP_SUBSCRIPTIONS_CMD;
return orte_dps.pack(cmd, &command, 1, ORTE_GPR_CMD);
if (ORTE_SUCCESS != (rc = orte_dps.pack(cmd, &command, 1, ORTE_GPR_CMD))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_dps.pack(cmd, &start, 1, ORTE_GPR_SUBSCRIPTION_ID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
}
int orte_gpr_base_pack_dump_a_trigger(orte_buffer_t *cmd, char *name, orte_gpr_trigger_id_t id)
{
orte_gpr_cmd_flag_t command;
int rc;
command = ORTE_GPR_DUMP_A_TRIGGER_CMD;
if (ORTE_SUCCESS != (rc = orte_dps.pack(cmd, &command, 1, ORTE_GPR_CMD))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_dps.pack(cmd, &name, 1, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_dps.pack(cmd, &id, 1, ORTE_GPR_TRIGGER_ID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
}
int orte_gpr_base_pack_dump_a_subscription(orte_buffer_t *cmd, char *name,
orte_gpr_subscription_id_t id)
{
orte_gpr_cmd_flag_t command;
int rc;
command = ORTE_GPR_DUMP_A_SUBSCRIPTION_CMD;
if (ORTE_SUCCESS != (rc = orte_dps.pack(cmd, &command, 1, ORTE_GPR_CMD))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_dps.pack(cmd, &name, 1, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_dps.pack(cmd, &id, 1, ORTE_GPR_SUBSCRIPTION_ID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
}
int orte_gpr_base_pack_dump_callbacks(orte_buffer_t *cmd)

Просмотреть файл

@ -33,7 +33,7 @@
#include "mca/gpr/base/base.h"
int orte_gpr_base_pack_subscribe(orte_buffer_t *cmd,
size_t num_subs,
size_t num_subs,
orte_gpr_subscription_t **subscriptions,
size_t num_trigs,
orte_gpr_trigger_t **trigs)
@ -52,14 +52,14 @@ int orte_gpr_base_pack_subscribe(orte_buffer_t *cmd,
if (ORTE_SUCCESS != (rc = orte_dps.pack(cmd, &command, 1, ORTE_GPR_CMD))) {
ORTE_ERROR_LOG(rc);
return rc;
return rc;
}
/* see if there are subscriptions - if so, pack them */
if (NULL != subscriptions) {
if (ORTE_SUCCESS != (rc = orte_dps.pack(cmd, subscriptions, num_subs, ORTE_GPR_SUBSCRIPTION))) {
ORTE_ERROR_LOG(rc);
return rc;
return rc;
}
} else {
if (ORTE_SUCCESS != (rc = orte_dps.pack(cmd, &zero, 1, ORTE_SIZE))) {
@ -89,7 +89,7 @@ int orte_gpr_base_pack_subscribe(orte_buffer_t *cmd,
int orte_gpr_base_pack_unsubscribe(orte_buffer_t *cmd,
orte_gpr_subscription_id_t id)
orte_gpr_subscription_id_t id)
{
orte_gpr_cmd_flag_t command;
int rc;
@ -97,11 +97,11 @@ int orte_gpr_base_pack_unsubscribe(orte_buffer_t *cmd,
command = ORTE_GPR_UNSUBSCRIBE_CMD;
if (ORTE_SUCCESS != (rc = orte_dps.pack(cmd, &command, 1, ORTE_GPR_CMD))) {
return rc;
return rc;
}
if (ORTE_SUCCESS != (rc = orte_dps.pack(cmd, &id, 1, ORTE_GPR_SUBSCRIPTION_ID))) {
return rc;
return rc;
}
return ORTE_SUCCESS;

Просмотреть файл

@ -52,10 +52,17 @@ int orte_gpr_base_dump_notify_msg(orte_buffer_t *buffer,
return ORTE_SUCCESS;
}
if (NULL == msg->name) {
asprintf(&tmp_out, "\tTrigger name: NULL");
if (ORTE_GPR_TRIGGER_MSG == msg->msg_type) {
asprintf(&tmp_out, "TRIGGER message");
} else if (ORTE_GPR_SUBSCRIPTION_MSG == msg->msg_type) {
asprintf(&tmp_out, "SUBSCRIPTION message");
}
orte_gpr_base_dump_load_string(buffer, &tmp_out);
if (NULL == msg->target) {
asprintf(&tmp_out, "\tTrigger target: NULL");
} else {
asprintf(&tmp_out, "\tTrigger name: %s", msg->name);
asprintf(&tmp_out, "\tTrigger target: %s", msg->target);
}
orte_gpr_base_dump_load_string(buffer, &tmp_out);
@ -108,9 +115,9 @@ static void orte_gpr_base_dump_data(orte_buffer_t *buffer,
orte_gpr_value_t **values;
size_t i, j;
if (NULL != data->name) {
asprintf(&tmp_out, "%lu values going to subscription name %s",
(unsigned long) data->cnt, data->name);
if (NULL != data->target) {
asprintf(&tmp_out, "%lu values going to subscription target %s",
(unsigned long) data->cnt, data->target);
} else {
asprintf(&tmp_out, "%lu values going to subscription num %lu",
(unsigned long) data->cnt, (unsigned long) data->id);
@ -259,7 +266,7 @@ void orte_gpr_base_dump_keyval_value(orte_buffer_t *buffer, orte_gpr_keyval_t *i
break;
case ORTE_PID:
asprintf(&tmp_out, "\t\t\tData type: ORTE_PID:\tValue: " ORTE_PID_T_PRINTF, iptr->value.pid);
asprintf(&tmp_out, "\t\t\tData type: ORTE_PID:\tValue: %lu", (unsigned long)iptr->value.pid);
orte_gpr_base_dump_load_string(buffer, &tmp_out);
break;

Просмотреть файл

@ -350,7 +350,7 @@ typedef int (*orte_gpr_base_module_get_nb_fn_t)(orte_gpr_addr_mode_t addr_mode,
* @endcode
*/
typedef int (*orte_gpr_base_module_delete_entries_fn_t)(orte_gpr_addr_mode_t addr_mode,
char *segment, char **tokens, char **keys);
char *segment, char **tokens, char **keys);
/*
* Delete an object from the registry (NON-BLOCKING)
@ -501,6 +501,18 @@ typedef int (*orte_gpr_base_module_define_trigger_fn_t)(orte_gpr_trigger_id_t *i
orte_gpr_trigger_cb_fn_t cbfunc,
void *user_tag);
typedef int (*orte_gpr_base_module_define_trigger_level_fn_t)(orte_gpr_trigger_id_t *id,
char *trig_name,
orte_gpr_trigger_action_t action,
orte_gpr_addr_mode_t addr_mode,
char *segment,
char **tokens,
size_t n,
char **keys,
size_t *levels,
orte_gpr_trigger_cb_fn_t cbfunc,
void *user_tag);
/*
* Cancel a subscription.
* Once a subscription has been entered on the registry, a caller may choose to permanently
@ -556,9 +568,23 @@ typedef int (*orte_gpr_base_module_dump_all_fn_t)(int output_id);
typedef int (*orte_gpr_base_module_dump_segment_fn_t)(char *segment, int output_id);
typedef int (*orte_gpr_base_module_dump_triggers_fn_t)(int output_id);
typedef int (*orte_gpr_base_module_dump_triggers_fn_t)(
orte_gpr_trigger_id_t tail,
int output_id);
typedef int (*orte_gpr_base_module_dump_subscriptions_fn_t)(int output_id);
typedef int (*orte_gpr_base_module_dump_subscriptions_fn_t)(
orte_gpr_subscription_id_t tail,
int output_id);
typedef int (*orte_gpr_base_module_dump_a_trigger_fn_t)(
char *name,
orte_gpr_trigger_id_t id,
int output_id);
typedef int (*orte_gpr_base_module_dump_a_subscription_fn_t)(
char *name,
orte_gpr_subscription_id_t id,
int output_id);
typedef int (*orte_gpr_base_module_dump_local_triggers_fn_t)(int output_id);
@ -602,6 +628,19 @@ typedef int (*orte_gpr_base_module_decrement_value_fn_t)(orte_gpr_value_t *value
typedef int (*orte_gpr_base_module_xfer_payload_fn_t)(orte_gpr_value_union_t *dest,
orte_gpr_value_union_t *src, orte_data_type_t type);
/* Deliver a notify message
* To support the broadcast of stage gate messages that supply all subscribed
* data in a single message, we have to provide an API that allows the xcast
* to "inject" the message back into the registry's local delivery system.
*
* @param msg A pointer to the orte_gpr_notify_message_t object to be delivered.
* Note that the calling program is responsible for releasing this object.
*
* @retval None
*/
typedef int (*orte_gpr_base_module_deliver_notify_msg_t)(orte_gpr_notify_message_t *msg);
/*
* Ver 1.0.0
*/
@ -625,6 +664,7 @@ struct orte_gpr_base_module_1_0_0_t {
/* GENERAL OPERATIONS */
orte_gpr_base_module_preallocate_segment_fn_t preallocate_segment;
orte_gpr_base_module_xfer_payload_fn_t xfer_payload;
orte_gpr_base_module_deliver_notify_msg_t deliver_notify_msg;
/* ARITHMETIC OPERATIONS */
orte_gpr_base_module_increment_value_fn_t increment_value;
orte_gpr_base_module_decrement_value_fn_t decrement_value;
@ -633,6 +673,7 @@ struct orte_gpr_base_module_1_0_0_t {
orte_gpr_base_module_subscribe_1_fn_t subscribe_1;
orte_gpr_base_module_subscribe_N_fn_t subscribe_N;
orte_gpr_base_module_define_trigger_fn_t define_trigger;
orte_gpr_base_module_define_trigger_level_fn_t define_trigger_level;
orte_gpr_base_module_unsubscribe_fn_t unsubscribe;
orte_gpr_base_module_cancel_trigger_fn_t cancel_trigger;
/* COMPOUND COMMANDS */
@ -644,6 +685,8 @@ struct orte_gpr_base_module_1_0_0_t {
orte_gpr_base_module_dump_segment_fn_t dump_segment;
orte_gpr_base_module_dump_triggers_fn_t dump_triggers;
orte_gpr_base_module_dump_subscriptions_fn_t dump_subscriptions;
orte_gpr_base_module_dump_a_trigger_fn_t dump_a_trigger;
orte_gpr_base_module_dump_a_subscription_fn_t dump_a_subscription;
orte_gpr_base_module_dump_local_triggers_fn_t dump_local_triggers;
orte_gpr_base_module_dump_local_subscriptions_fn_t dump_local_subscriptions;
orte_gpr_base_module_dump_callbacks_fn_t dump_callbacks;
@ -662,9 +705,9 @@ typedef orte_gpr_base_module_1_0_0_t orte_gpr_base_module_t;
*/
typedef orte_gpr_base_module_t* (*orte_gpr_base_component_init_fn_t)(
bool *allow_multi_user_threads,
bool *have_hidden_threads,
int *priority);
bool *allow_multi_user_threads,
bool *have_hidden_threads,
int *priority);
typedef int (*orte_gpr_base_component_finalize_fn_t)(void);
@ -686,11 +729,11 @@ typedef mca_gpr_base_component_1_0_0_t mca_gpr_base_component_t;
/*
* Macro for use in modules that are of type gpr v1.0.0
*/
#define MCA_GPR_BASE_VERSION_1_0_0 \
/* gpr v1.0 is chained to MCA v1.0 */ \
MCA_BASE_VERSION_1_0_0, \
/* gpr v1.0 */ \
"gpr", 1, 0, 0
#define MCA_GPR_BASE_VERSION_1_0_0 \
/* gpr v1.0 is chained to MCA v1.0 */ \
MCA_BASE_VERSION_1_0_0, \
/* gpr v1.0 */ \
"gpr", 1, 0, 0
/*
* global module that holds function pointers

Просмотреть файл

@ -120,6 +120,7 @@ typedef union { /* shared storage for the value */
size_t size;
bool tf_flag;
pid_t pid;
int intval;
uint8_t ui8;
uint16_t ui16;
uint32_t ui32;
@ -188,7 +189,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(orte_gpr_value_t);
*/
typedef struct {
opal_object_t super; /**< Makes this an object */
char *name; /**< Name of the associated subscripton, if provided */
char *target; /**< Name of the associated subscripton, if provided */
orte_gpr_subscription_id_t id; /**< Number of the associated subscription */
bool remove; /**< Remove this subscription from recipient's tracker */
size_t cnt; /**< Number of value objects returned, one per container */
@ -199,10 +200,17 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(orte_gpr_notify_data_t);
/** Return message for notify requests
*/
typedef uint8_t orte_gpr_notify_msg_type_t;
#define ORTE_GPR_NOTIFY_MSG_TYPE_T ORTE_UINT8
#define ORTE_GPR_TRIGGER_MSG (orte_gpr_notify_msg_type_t)0x01
#define ORTE_GPR_SUBSCRIPTION_MSG (orte_gpr_notify_msg_type_t)0x02
typedef struct {
opal_object_t super; /**< Make this an object */
char *name; /**< Name of the associated trigger, if provided */
orte_gpr_trigger_id_t id; /**< trigger id, if message comes from trigger (ORTE_GPR_TRIGGER_ID_MAX otherwise) */
orte_gpr_notify_msg_type_t msg_type; /**< trigger or subscription msg */
char *target; /**< Name of the associated trigger, if provided */
orte_gpr_trigger_id_t id; /**< trigger id, if message comes from trigger
(ORTE_GPR_TRIGGER_ID_MAX otherwise) */
bool remove; /**< Remove this trigger from recipient's tracker */
size_t cnt; /**< number of data objects */
orte_pointer_array_t *data; /**< Contiguous array of pointers to data objects */
@ -221,8 +229,10 @@ typedef void (*orte_gpr_notify_cb_fn_t)(orte_gpr_notify_data_t *notify_data, voi
* notify_msg = message containing multiple blocks of data provided by trigger
*
* user_tag = whatever tag data the user provided when filing the subscription
*
* Since this only takes place locally, we CAN get a status code from the callback!
*/
typedef void (*orte_gpr_trigger_cb_fn_t)(orte_gpr_notify_message_t *msg, void *user_tag);
typedef int (*orte_gpr_trigger_cb_fn_t)(orte_gpr_notify_message_t *msg);
/** Structure for registering subscriptions
* A request to be notified when certain events occur, or when counters reach specified

Просмотреть файл

@ -73,8 +73,8 @@ orte_gpr_null_delete_segment(char *segment)
static int
orte_gpr_null_delete_segment_nb(char *segment,
orte_gpr_notify_cb_fn_t cbfunc,
void *user_tag)
orte_gpr_notify_cb_fn_t cbfunc,
void *user_tag)
{
return ORTE_SUCCESS;
}
@ -87,41 +87,41 @@ orte_gpr_null_put(size_t cnt, orte_gpr_value_t ** values)
static int
orte_gpr_null_put_nb(size_t cnt, orte_gpr_value_t ** values,
orte_gpr_notify_cb_fn_t cbfunc, void *user_tag)
orte_gpr_notify_cb_fn_t cbfunc, void *user_tag)
{
return ORTE_SUCCESS;
}
static int
orte_gpr_null_get(orte_gpr_addr_mode_t addr_mode,
char *segment, char **tokens, char **keys,
size_t * cnt, orte_gpr_value_t *** values)
char *segment, char **tokens, char **keys,
size_t * cnt, orte_gpr_value_t *** values)
{
return ORTE_ERR_NOT_IMPLEMENTED;
}
static int
orte_gpr_null_get_nb(orte_gpr_addr_mode_t addr_mode,
char *segment, char **tokens, char **keys,
orte_gpr_notify_cb_fn_t cbfunc, void *user_tag)
char *segment, char **tokens, char **keys,
orte_gpr_notify_cb_fn_t cbfunc, void *user_tag)
{
return ORTE_ERR_NOT_IMPLEMENTED;
}
static int
orte_gpr_null_delete_entries(orte_gpr_addr_mode_t addr_mode,
char *segment, char **tokens,
char **keys)
char *segment, char **tokens,
char **keys)
{
return ORTE_SUCCESS;
}
static int
orte_gpr_null_delete_entries_nb(orte_gpr_addr_mode_t addr_mode,
char *segment, char **tokens,
char **keys,
orte_gpr_notify_cb_fn_t cbfunc,
void *user_tag)
char *segment, char **tokens,
char **keys,
orte_gpr_notify_cb_fn_t cbfunc,
void *user_tag)
{
return ORTE_SUCCESS;
}
@ -134,17 +134,17 @@ orte_gpr_null_index(char *segment, size_t * cnt, char ***index)
static int
orte_gpr_null_index_nb(char *segment,
orte_gpr_notify_cb_fn_t cbfunc,
void *user_tag)
orte_gpr_notify_cb_fn_t cbfunc,
void *user_tag)
{
return ORTE_SUCCESS;
}
static int
orte_gpr_null_subscribe(size_t num_subs,
orte_gpr_subscription_t ** subscriptions,
size_t num_trigs,
orte_gpr_trigger_t ** triggers)
orte_gpr_subscription_t ** subscriptions,
size_t num_trigs,
orte_gpr_trigger_t ** triggers)
{
return ORTE_SUCCESS;
}
@ -174,13 +174,13 @@ orte_gpr_null_dump_segments(char *segment, int output_id)
}
static int
orte_gpr_null_dump_triggers(int output_id)
orte_gpr_null_dump_triggers(orte_gpr_trigger_id_t start, int output_id)
{
return ORTE_SUCCESS;
}
static int
orte_gpr_null_dump_subscriptions(int output_id)
orte_gpr_null_dump_subscriptions(orte_gpr_subscription_id_t start, int output_id)
{
return ORTE_SUCCESS;
}
@ -205,14 +205,14 @@ orte_gpr_null_dump_callbacks(int output_id)
static int
orte_gpr_null_dump_notify_msg(orte_gpr_notify_message_t * msg,
int output_id)
int output_id)
{
return ORTE_SUCCESS;
}
static int
orte_gpr_null_dump_notify_data(orte_gpr_notify_data_t * data,
int output_id)
int output_id)
{
return ORTE_SUCCESS;
}
@ -237,8 +237,8 @@ orte_gpr_null_decrement_value(orte_gpr_value_t * value)
static int
orte_gpr_null_xfer_payload(orte_gpr_value_union_t * dest,
orte_gpr_value_union_t * src,
orte_data_type_t type)
orte_gpr_value_union_t * src,
orte_data_type_t type)
{
return ORTE_SUCCESS;
}
@ -306,8 +306,41 @@ static int orte_gpr_null_define_trigger(orte_gpr_trigger_id_t *id,
return ORTE_SUCCESS;
}
static int orte_gpr_null_define_trigger_level(orte_gpr_trigger_id_t *id,
char *trig_name,
orte_gpr_trigger_action_t action,
orte_gpr_addr_mode_t addr_mode,
char *segment,
char **tokens,
size_t n,
char **keys,
size_t *levels,
orte_gpr_trigger_cb_fn_t cbfunc,
void *user_tag)
{
return ORTE_SUCCESS;
}
static int orte_gpr_null_deliver_notify_msg(orte_gpr_notify_message_t *msg)
{
return ORTE_SUCCESS;
}
static int orte_gpr_null_dump_a_trigger(
char *name,
orte_gpr_trigger_id_t id,
int output_id)
{
return ORTE_SUCCESS;
}
static int orte_gpr_null_dump_a_subscription(
char *name,
orte_gpr_subscription_id_t id,
int output_id)
{
return ORTE_SUCCESS;
}
/*
* setup the function pointers for the module
*/
@ -331,6 +364,7 @@ orte_gpr_base_module_t orte_gpr_null_module = {
/* GENERAL OPERATIONS */
orte_gpr_null_preallocate_segment,
orte_gpr_null_xfer_payload,
orte_gpr_null_deliver_notify_msg,
/* ARITHMETIC OPERATIONS */
orte_gpr_null_increment_value,
orte_gpr_null_decrement_value,
@ -339,6 +373,7 @@ orte_gpr_base_module_t orte_gpr_null_module = {
orte_gpr_null_subscribe_1,
orte_gpr_null_subscribe_N,
orte_gpr_null_define_trigger,
orte_gpr_null_define_trigger_level,
orte_gpr_null_unsubscribe,
orte_gpr_null_cancel_trigger,
/* COMPOUND COMMANDS */
@ -350,6 +385,8 @@ orte_gpr_base_module_t orte_gpr_null_module = {
orte_gpr_null_dump_segments,
orte_gpr_null_dump_triggers,
orte_gpr_null_dump_subscriptions,
orte_gpr_null_dump_a_trigger,
orte_gpr_null_dump_a_subscription,
orte_gpr_null_dump_local_triggers,
orte_gpr_null_dump_local_subscriptions,
orte_gpr_null_dump_callbacks,

Просмотреть файл

@ -19,16 +19,17 @@
include $(top_ompi_srcdir)/config/Makefile.options
sources = \
gpr_proxy_arithmetic_ops.c \
gpr_proxy_cleanup.c \
gpr_proxy_component.c \
gpr_proxy_compound_cmd.c \
gpr_proxy_del_index.c \
gpr_proxy_cleanup.c \
gpr_proxy_deliver_notify_msg.c \
gpr_proxy_dump.c \
gpr_proxy_dump_local_trigs_subs.c \
gpr_proxy_general_operations.c \
gpr_proxy_internals.c \
gpr_proxy_put_get.c \
gpr_proxy_general_operations.c \
gpr_proxy_arithmetic_ops.c \
gpr_proxy_subscribe.c \
gpr_proxy.h

Просмотреть файл

@ -55,6 +55,7 @@ int orte_gpr_proxy_finalize(void);
typedef struct {
opal_object_t super; /**< Allows this to be an object */
orte_gpr_subscription_id_t id; /**< id of this subscription */
size_t index; /**< location of this subscription in array */
char *name;
orte_gpr_notify_cb_fn_t callback; /**< Function to be called for notificaiton */
void *user_tag; /**< User-provided tag for callback function */
@ -66,6 +67,7 @@ OBJ_CLASS_DECLARATION(orte_gpr_proxy_subscriber_t);
typedef struct {
opal_object_t super; /**< Allows this to be an object */
orte_gpr_trigger_id_t id; /**< id of this trigger */
size_t index; /**< location of this trigger in array */
char *name;
orte_gpr_trigger_cb_fn_t callback; /**< Function to be called for notification */
void *user_tag; /**< User-provided tag for callback function */
@ -119,7 +121,7 @@ int orte_gpr_proxy_delete_segment_nb(char *segment,
orte_gpr_notify_cb_fn_t cbfunc, void *user_tag);
int orte_gpr_proxy_delete_entries(orte_gpr_addr_mode_t mode,
char *segment, char **tokens, char **keys);
char *segment, char **tokens, char **keys);
int orte_gpr_proxy_delete_entries_nb(
orte_gpr_addr_mode_t addr_mode,
@ -177,9 +179,17 @@ int orte_gpr_proxy_dump_all(int output_id);
int orte_gpr_proxy_dump_segments(char *segment, int output_id);
int orte_gpr_proxy_dump_triggers(int output_id);
int orte_gpr_proxy_dump_triggers(orte_gpr_trigger_id_t start, int output_id);
int orte_gpr_proxy_dump_subscriptions(int output_id);
int orte_gpr_proxy_dump_subscriptions(orte_gpr_subscription_id_t start, int output_id);
int orte_gpr_proxy_dump_a_trigger(char *name,
orte_gpr_trigger_id_t id,
int output_id);
int orte_gpr_proxy_dump_a_subscription(char *name,
orte_gpr_subscription_id_t id,
int output_id);
int orte_gpr_proxy_dump_local_triggers(int output_id);
@ -198,12 +208,14 @@ int orte_gpr_proxy_dump_value(orte_gpr_value_t *value, int output_id);
*/
int orte_gpr_proxy_preallocate_segment(char *name, size_t num_slots);
int orte_gpr_proxy_deliver_notify_msg(orte_gpr_notify_message_t *msg);
/*
* Functions that interface to the replica
*/
void orte_gpr_proxy_notify_recv(int status, orte_process_name_t* sender,
orte_buffer_t *buffer, orte_rml_tag_t tag,
void* cbdata);
orte_buffer_t *buffer, orte_rml_tag_t tag,
void* cbdata);
/*
@ -214,14 +226,14 @@ int
orte_gpr_proxy_enter_subscription(size_t cnt, orte_gpr_subscription_t **subscriptions);
int
orte_gpr_proxy_remove_subscription(orte_gpr_subscription_id_t id);
orte_gpr_proxy_remove_subscription(orte_gpr_proxy_subscriber_t *sub);
int
orte_gpr_proxy_enter_trigger(size_t cnt, orte_gpr_trigger_t **triggers);
int
orte_gpr_proxy_remove_trigger(orte_gpr_trigger_id_t id);
orte_gpr_proxy_remove_trigger(orte_gpr_proxy_trigger_t *trig);
#if defined(c_plusplus) || defined(__cplusplus)
}

Просмотреть файл

@ -45,17 +45,17 @@
*/
mca_gpr_base_component_t mca_gpr_proxy_component = {
{
MCA_GPR_BASE_VERSION_1_0_0,
MCA_GPR_BASE_VERSION_1_0_0,
"proxy", /* MCA module name */
ORTE_MAJOR_VERSION, /* MCA module major version */
ORTE_MINOR_VERSION, /* MCA module minor version */
ORTE_RELEASE_VERSION, /* MCA module release version */
orte_gpr_proxy_open, /* module open */
orte_gpr_proxy_close /* module close */
"proxy", /* MCA module name */
ORTE_MAJOR_VERSION, /* MCA module major version */
ORTE_MINOR_VERSION, /* MCA module minor version */
ORTE_RELEASE_VERSION, /* MCA module release version */
orte_gpr_proxy_open, /* module open */
orte_gpr_proxy_close /* module close */
},
{
false /* checkpoint / restart */
false /* checkpoint / restart */
},
orte_gpr_proxy_component_init, /* module init */
orte_gpr_proxy_finalize /* module shutdown */
@ -84,6 +84,7 @@ static orte_gpr_base_module_t orte_gpr_proxy = {
/* GENERAL OPERATIONS */
orte_gpr_proxy_preallocate_segment,
orte_gpr_base_xfer_payload,
orte_gpr_proxy_deliver_notify_msg,
/* ARITHMETIC OPERATIONS */
orte_gpr_proxy_increment_value,
orte_gpr_proxy_decrement_value,
@ -92,6 +93,7 @@ static orte_gpr_base_module_t orte_gpr_proxy = {
orte_gpr_base_subscribe_1,
orte_gpr_base_subscribe_N,
orte_gpr_base_define_trigger,
orte_gpr_base_define_trigger_level,
orte_gpr_proxy_unsubscribe,
orte_gpr_proxy_cancel_trigger,
/* COMPOUND COMMANDS */
@ -103,6 +105,8 @@ static orte_gpr_base_module_t orte_gpr_proxy = {
orte_gpr_proxy_dump_segments,
orte_gpr_proxy_dump_triggers,
orte_gpr_proxy_dump_subscriptions,
orte_gpr_proxy_dump_a_trigger,
orte_gpr_proxy_dump_a_subscription,
orte_gpr_proxy_dump_local_triggers,
orte_gpr_proxy_dump_local_subscriptions,
orte_gpr_proxy_dump_callbacks,
@ -207,17 +211,17 @@ orte_gpr_proxy_component_init(bool *allow_multi_user_threads, bool *have_hidden_
int ret;
if (orte_gpr_proxy_globals.debug) {
opal_output(0, "gpr_proxy_init called");
opal_output(0, "gpr_proxy_init called");
}
/* If we are NOT to host a replica, then we want to be selected, so do all
the setup and return the module */
if (NULL != orte_process_info.gpr_replica_uri) {
if (orte_gpr_proxy_globals.debug) {
opal_output(0, "[%lu,%lu,%lu] gpr_proxy_init: proxy selected",
if (orte_gpr_proxy_globals.debug) {
opal_output(0, "[%lu,%lu,%lu] gpr_proxy_init: proxy selected",
ORTE_NAME_ARGS(orte_process_info.my_name));
}
}
/* setup the replica location */
if(ORTE_SUCCESS != (ret = orte_rml.parse_uris(orte_process_info.gpr_replica_uri, &name, NULL))) {
@ -229,29 +233,29 @@ orte_gpr_proxy_component_init(bool *allow_multi_user_threads, bool *have_hidden_
return NULL;
}
/* Return a module (choose an arbitrary, positive priority --
it's only relevant compared to other ns components). If
we're not the seed, then we don't want to be selected, so
return NULL. */
/* Return a module (choose an arbitrary, positive priority --
it's only relevant compared to other ns components). If
we're not the seed, then we don't want to be selected, so
return NULL. */
*priority = 10;
*priority = 10;
/* We allow multi user threads but don't have any hidden threads */
/* We allow multi user threads but don't have any hidden threads */
*allow_multi_user_threads = true;
*have_hidden_threads = false;
*allow_multi_user_threads = true;
*have_hidden_threads = false;
/* setup thread locks and condition variable */
OBJ_CONSTRUCT(&orte_gpr_proxy_globals.mutex, opal_mutex_t);
OBJ_CONSTRUCT(&orte_gpr_proxy_globals.wait_for_compound_mutex, opal_mutex_t);
OBJ_CONSTRUCT(&orte_gpr_proxy_globals.compound_cmd_condition, opal_condition_t);
/* setup thread locks and condition variable */
OBJ_CONSTRUCT(&orte_gpr_proxy_globals.mutex, opal_mutex_t);
OBJ_CONSTRUCT(&orte_gpr_proxy_globals.wait_for_compound_mutex, opal_mutex_t);
OBJ_CONSTRUCT(&orte_gpr_proxy_globals.compound_cmd_condition, opal_condition_t);
/* initialize the registry compound mode */
orte_gpr_proxy_globals.compound_cmd_mode = false;
orte_gpr_proxy_globals.compound_cmd_waiting = 0;
orte_gpr_proxy_globals.compound_cmd = NULL;
/* initialize the registry compound mode */
orte_gpr_proxy_globals.compound_cmd_mode = false;
orte_gpr_proxy_globals.compound_cmd_waiting = 0;
orte_gpr_proxy_globals.compound_cmd = NULL;
/* initialize the subscription tracker */
/* initialize the subscription tracker */
if (ORTE_SUCCESS != (ret = orte_pointer_array_init(&(orte_gpr_proxy_globals.subscriptions),
orte_gpr_array_block_size,
orte_gpr_array_max_size,
@ -298,12 +302,12 @@ int orte_gpr_proxy_finalize(void)
{
if (orte_gpr_proxy_globals.debug) {
opal_output(0, "[%lu,%lu,%lu] gpr_proxy_finalize called",
opal_output(0, "[%lu,%lu,%lu] gpr_proxy_finalize called",
ORTE_NAME_ARGS(orte_process_info.my_name));
}
if (initialized) {
initialized = false;
initialized = false;
}
/* All done */
@ -316,31 +320,23 @@ int orte_gpr_proxy_finalize(void)
*/
void orte_gpr_proxy_notify_recv(int status, orte_process_name_t* sender,
orte_buffer_t *buffer, orte_rml_tag_t tag,
void* cbdata)
orte_buffer_t *buffer, orte_rml_tag_t tag,
void* cbdata)
{
orte_gpr_cmd_flag_t command;
orte_gpr_notify_message_t *msg;
orte_gpr_notify_data_t **data;
orte_gpr_proxy_subscriber_t *sub;
orte_gpr_proxy_trigger_t *trig;
size_t i, n;
size_t n;
int rc;
if (orte_gpr_proxy_globals.debug) {
opal_output(0, "[%lu,%lu,%lu] gpr_proxy_notify_recv: received trigger message",
ORTE_NAME_ARGS(orte_process_info.my_name));
}
n = 1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(buffer, &command, &n, ORTE_GPR_CMD))) {
ORTE_ERROR_LOG(rc);
goto RETURN_ERROR;
}
if (ORTE_GPR_NOTIFY_CMD != command) {
if (ORTE_GPR_NOTIFY_CMD != command) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
goto RETURN_ERROR;
goto RETURN_ERROR;
}
msg = OBJ_NEW(orte_gpr_notify_message_t);
@ -356,61 +352,16 @@ void orte_gpr_proxy_notify_recv(int status, orte_process_name_t* sender,
goto RETURN_ERROR;
}
/* if the message trigger id is valid (i.e., it is set to
* something other than ORTE_GPR_TRIGGER_ID_MAX), then this
* is an aggregated message intended for a single receiver.
* In that case, look up the associated TRIGGER id and pass
* the entire message to that receiver.
*/
if (ORTE_GPR_TRIGGER_ID_MAX > msg->id) {
trig = (orte_gpr_proxy_globals.triggers)->addr[msg->id];
if (NULL == trig) {
ORTE_ERROR_LOG(ORTE_ERR_GPR_DATA_CORRUPT);
} else {
trig->callback(msg, sub->user_tag);
}
if (msg->remove) {
if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_trigger(msg->id))) {
ORTE_ERROR_LOG(rc);
}
}
OBJ_RELEASE(msg);
goto RETURN_ERROR;
}
/* if the message trigger id was NOT valid, then we split the
* message into its component datagrams and send each of them
* separately to their rescpective subscriber.
*/
if (msg->cnt > 0) {
data = (orte_gpr_notify_data_t**)(msg->data)->addr;
for (i=0; i < msg->cnt; i++) {
/* for speed purposes, we take advantage here of
* our knowledge on how this pointer array was
* constructed - we know that it is contiguous
* and that there are no NULL gaps in it.
*/
/* process request */
if (data[i]->id > orte_gpr_proxy_globals.num_subs) {
ORTE_ERROR_LOG(ORTE_ERR_GPR_DATA_CORRUPT);
continue;
}
sub = (orte_gpr_proxy_globals.subscriptions)->addr[data[i]->id];
if (NULL == sub) {
ORTE_ERROR_LOG(ORTE_ERR_GPR_DATA_CORRUPT);
} else {
sub->callback(data[i], sub->user_tag);
}
if (data[i]->remove) {
if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_subscription(data[i]->id))) {
ORTE_ERROR_LOG(rc);
}
}
}
/* release data */
/* process the message */
if (ORTE_SUCCESS != (rc = orte_gpr_proxy_deliver_notify_msg(msg))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(msg);
}
goto RETURN_ERROR;
}
/* release data */
OBJ_RELEASE(msg);
RETURN_ERROR:

Просмотреть файл

@ -44,80 +44,107 @@ int orte_gpr_proxy_deliver_notify_msg(orte_gpr_notify_message_t *msg)
{
orte_gpr_notify_data_t **data;
orte_gpr_proxy_subscriber_t **subs, *sub;
orte_gpr_proxy_trigger_t *trig;
size_t i, j, k;
orte_gpr_proxy_trigger_t *trig, **trigs;
size_t i, j, k, n;
bool processed;
int rc;
/* if the message trigger id is valid (i.e., it is set to
* something other than ORTE_GPR_TRIGGER_ID_MAX), then this
* is an aggregated message intended for a single receiver.
* In that case, look up the associated TRIGGER id and pass
* the entire message to that receiver.
/* we first have to check if the message is a trigger message - if so,
* then the message is intended to be
* sent as a single block to that trigger's callback function.
*/
if (ORTE_GPR_TRIGGER_ID_MAX > msg->id) {
if (ORTE_GPR_TRIGGER_MSG == msg->msg_type) {
trig = (orte_gpr_proxy_globals.triggers)->addr[msg->id];
if (NULL == trig) {
ORTE_ERROR_LOG(ORTE_ERR_GPR_DATA_CORRUPT);
opal_output(0, "Trigger id: %lu", (unsigned long)msg->id);
orte_gpr.dump_local_triggers(0);
return ORTE_ERR_GPR_DATA_CORRUPT;
} else {
trig->callback(msg);
}
if (msg->remove) {
if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_trigger(msg->id))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* remove the specified trigger from the local tracker */
trigs = (orte_gpr_proxy_trigger_t**)(orte_gpr_proxy_globals.triggers)->addr;
for (i=0, j=0; j < orte_gpr_proxy_globals.num_trigs &&
i < (orte_gpr_proxy_globals.triggers)->size; i++) {
if (NULL != trigs[i]){
j++;
if (msg->id == trigs[i]->id) {
if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_trigger(trigs[i]))) {
ORTE_ERROR_LOG(rc);
}
OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex);
return rc;
}
}
}
/* must not have been found - report error */
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
return ORTE_SUCCESS;
}
/* if the message trigger id was NOT valid, then we split the
* message into its component datagrams and send each of them
* separately to their respective subscriber.
/* get here if this wasn't a trigger message. Only other allowed message type
* is a subscription message - if that isn't the case, then we have corrupt
* data, so flag it and return
*/
if (ORTE_GPR_SUBSCRIPTION_MSG != msg->msg_type) {
ORTE_ERROR_LOG(ORTE_ERR_GPR_DATA_CORRUPT);
return ORTE_ERR_GPR_DATA_CORRUPT;
}
/* get here if we have a subscription message - i.e., the message should
* be broken into its component parts and delivered separately
* to the indicated subscribers
*/
data = (orte_gpr_notify_data_t**)(msg->data)->addr;
orte_gpr.dump_local_subscriptions(0);
for (i=0; i < msg->cnt; i++) {
opal_output(0, "[%lu,%lu,%lu] Sub data id %lu", ORTE_NAME_ARGS(orte_process_info.my_name), (unsigned long)data[i]->id);
opal_output(0, "\tSub name %s", data[i]->target);
/* for each datagram in the message, we need to lookup
* the associated subscription (could be specified by name or id) to find the correct
* callback function. Name specifications are given precedence over id.
*/
subs = (orte_gpr_proxy_subscriber_t**)
(orte_gpr_proxy_globals.subscriptions)->addr;
processed = false;
for (j=0, k=0; !processed &&
k < orte_gpr_proxy_globals.num_subs &&
j < (orte_gpr_proxy_globals.subscriptions)->size; j++) {
if (NULL != subs[j]) {
k++;
if ((NULL != subs[j]->name &&
NULL != data[i]->target &&
0 == strcmp(data[i]->target, subs[j]->name)) ||
(data[i]->id == subs[j]->id)) {
sub = subs[j];
processed = true;
for (i=0, n=0; n < msg->cnt &&
i < (msg->data)->size; i++) {
if (NULL != data[i]) {
n++;
if (ORTE_GPR_SUBSCRIPTION_ID_MAX != data[i]->id || NULL != data[i]->target) {
/* for each datagram in the message, we need to lookup
* the associated subscription (could be specified by name or id) to find the correct
* callback function. Name specifications are given precedence over id.
*/
subs = (orte_gpr_proxy_subscriber_t**)
(orte_gpr_proxy_globals.subscriptions)->addr;
processed = false;
for (j=0, k=0; !processed &&
k < orte_gpr_proxy_globals.num_subs &&
j < (orte_gpr_proxy_globals.subscriptions)->size; j++) {
if (NULL != subs[j]) {
k++;
if (NULL != data[i]->target) {
/* if target name provided, must use it */
if (NULL != subs[j]->name &&
0 == strcmp(data[i]->target, subs[j]->name)) {
sub = subs[j];
processed = true;
}
} else if (data[i]->id == subs[j]->id) {
/* otherwise, see if id's match */
sub = subs[j];
processed = true;
}
}
}
}
}
/* get here and not processed => not found, abort */
if (!processed) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex);
sub->callback(data[i], sub->user_tag);
OPAL_THREAD_LOCK(&orte_gpr_proxy_globals.mutex);
/* get here and not processed => not found, abort */
if (!processed) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex);
sub->callback(data[i], sub->user_tag);
OPAL_THREAD_LOCK(&orte_gpr_proxy_globals.mutex);
if (data[i]->remove) {
if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_subscription(sub))) {
ORTE_ERROR_LOG(rc);
return rc;
if (data[i]->remove) {
if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_subscription(sub))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}
}
}

Просмотреть файл

@ -167,7 +167,7 @@ int orte_gpr_proxy_dump_segments(char *segment, int output_id)
return rc;
}
int orte_gpr_proxy_dump_triggers(int output_id)
int orte_gpr_proxy_dump_triggers(orte_gpr_trigger_id_t start, int output_id)
{
orte_gpr_cmd_flag_t command;
orte_buffer_t *cmd;
@ -176,7 +176,7 @@ int orte_gpr_proxy_dump_triggers(int output_id)
size_t n;
if (orte_gpr_proxy_globals.compound_cmd_mode) {
return orte_gpr_base_pack_dump_triggers(orte_gpr_proxy_globals.compound_cmd);
return orte_gpr_base_pack_dump_triggers(orte_gpr_proxy_globals.compound_cmd, start);
}
cmd = OBJ_NEW(orte_buffer_t);
@ -185,7 +185,7 @@ int orte_gpr_proxy_dump_triggers(int output_id)
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_dump_triggers(cmd))) {
if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_dump_triggers(cmd, start))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(cmd);
return rc;
@ -228,7 +228,7 @@ int orte_gpr_proxy_dump_triggers(int output_id)
return rc;
}
int orte_gpr_proxy_dump_subscriptions(int output_id)
int orte_gpr_proxy_dump_subscriptions(orte_gpr_subscription_id_t start, int output_id)
{
orte_gpr_cmd_flag_t command;
orte_buffer_t *cmd;
@ -237,7 +237,7 @@ int orte_gpr_proxy_dump_subscriptions(int output_id)
size_t n;
if (orte_gpr_proxy_globals.compound_cmd_mode) {
return orte_gpr_base_pack_dump_subscriptions(orte_gpr_proxy_globals.compound_cmd);
return orte_gpr_base_pack_dump_subscriptions(orte_gpr_proxy_globals.compound_cmd, start);
}
cmd = OBJ_NEW(orte_buffer_t);
@ -246,7 +246,7 @@ int orte_gpr_proxy_dump_subscriptions(int output_id)
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_dump_subscriptions(cmd))) {
if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_dump_subscriptions(cmd, start))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(cmd);
return rc;
@ -289,6 +289,134 @@ int orte_gpr_proxy_dump_subscriptions(int output_id)
return rc;
}
int orte_gpr_proxy_dump_a_trigger(char *name,
orte_gpr_trigger_id_t id,
int output_id)
{
orte_gpr_cmd_flag_t command;
orte_buffer_t *cmd;
orte_buffer_t *answer;
int rc;
size_t n;
if (orte_gpr_proxy_globals.compound_cmd_mode) {
return orte_gpr_base_pack_dump_a_trigger(orte_gpr_proxy_globals.compound_cmd, name, id);
}
cmd = OBJ_NEW(orte_buffer_t);
if (NULL == cmd) { /* got a problem */
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_dump_a_trigger(cmd, name, id))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(cmd);
return rc;
}
if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, cmd, ORTE_RML_TAG_GPR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
return ORTE_ERR_COMM_FAILURE;
}
answer = OBJ_NEW(orte_buffer_t);
if (NULL == answer) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
return ORTE_ERR_COMM_FAILURE;
}
n = 1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(answer, &command, &n, ORTE_GPR_CMD))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(answer);
return rc;
}
if (ORTE_GPR_DUMP_TRIGGERS_CMD != command) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;
}
if (ORTE_SUCCESS != (rc = orte_gpr_base_print_dump(answer, output_id))) {
ORTE_ERROR_LOG(rc);
}
OBJ_RELEASE(answer);
return rc;
}
int orte_gpr_proxy_dump_a_subscription(char *name,
orte_gpr_subscription_id_t id,
int output_id)
{
orte_gpr_cmd_flag_t command;
orte_buffer_t *cmd;
orte_buffer_t *answer;
int rc;
size_t n;
if (orte_gpr_proxy_globals.compound_cmd_mode) {
return orte_gpr_base_pack_dump_a_subscription(orte_gpr_proxy_globals.compound_cmd, name, id);
}
cmd = OBJ_NEW(orte_buffer_t);
if (NULL == cmd) { /* got a problem */
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_dump_a_subscription(cmd, name, id))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(cmd);
return rc;
}
if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, cmd, ORTE_RML_TAG_GPR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
return ORTE_ERR_COMM_FAILURE;
}
answer = OBJ_NEW(orte_buffer_t);
if (NULL == answer) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
return ORTE_ERR_COMM_FAILURE;
}
n = 1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(answer, &command, &n, ORTE_GPR_CMD))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(answer);
return rc;
}
if (ORTE_GPR_DUMP_TRIGGERS_CMD != command) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;
}
if (ORTE_SUCCESS != (rc = orte_gpr_base_print_dump(answer, output_id))) {
ORTE_ERROR_LOG(rc);
}
OBJ_RELEASE(answer);
return rc;
}
int orte_gpr_proxy_dump_callbacks(int output_id)
{
orte_gpr_cmd_flag_t command;

Просмотреть файл

@ -35,7 +35,7 @@ int
orte_gpr_proxy_enter_subscription(size_t cnt, orte_gpr_subscription_t **subscriptions)
{
orte_gpr_proxy_subscriber_t *sub;
size_t i, id;
size_t i;
for (i=0; i < cnt; i++) {
sub = OBJ_NEW(orte_gpr_proxy_subscriber_t);
@ -48,7 +48,7 @@ orte_gpr_proxy_enter_subscription(size_t cnt, orte_gpr_subscription_t **subscrip
}
sub->callback = subscriptions[i]->cbfunc;
sub->user_tag = subscriptions[i]->user_tag;
if (0 > orte_pointer_array_add(&id, orte_gpr_proxy_globals.subscriptions, sub)) {
if (0 > orte_pointer_array_add(&sub->index, orte_gpr_proxy_globals.subscriptions, sub)) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
@ -64,10 +64,45 @@ orte_gpr_proxy_enter_subscription(size_t cnt, orte_gpr_subscription_t **subscrip
int
orte_gpr_proxy_enter_trigger(size_t cnt, orte_gpr_trigger_t **trigs)
{
orte_gpr_proxy_trigger_t *trig;
size_t i, id;
orte_gpr_proxy_trigger_t *trig, **tptr;
size_t i, j, k;
for (i=0; i < cnt; i++) {
/* If the provided trigger has a name, see if it already is on
* the local trigger list. If so, then check to see if we
* already defined a return point for it and/or if this trigger
* doesn't - in either of those two cases, we ignore the
* trigger and just use the existing entry
*/
if (NULL != trigs[i]->name) {
tptr = (orte_gpr_proxy_trigger_t**)(orte_gpr_proxy_globals.triggers)->addr;
for (j=0, k=0; k < orte_gpr_proxy_globals.num_trigs &&
j < (orte_gpr_proxy_globals.triggers)->size; j++) {
if (NULL != tptr[j]) {
k++;
if (0 == strcmp(tptr[j]->name, trigs[i]->name)) {
/* same name - trigger is already on list */
if (NULL != tptr[j]->callback || NULL == trigs[i]->cbfunc) {
/* ignore these cases */
trig = tptr[j];
goto MOVEON;
}
/* reach here if either the prior trigger didn't provide
* a callback, and the new one provides one. In this
* case, we update the existing trigger callback and then
* move on
*/
tptr[j]->callback = trigs[i]->cbfunc;
trig = tptr[j];
goto MOVEON;
}
}
}
}
/* either the trigger doesn't have a name, OR it did, but it isn't
* already on the list - add it to the list now
*/
trig = OBJ_NEW(orte_gpr_proxy_trigger_t);
if (NULL == trig) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
@ -89,13 +124,14 @@ orte_gpr_proxy_enter_trigger(size_t cnt, orte_gpr_trigger_t **trigs)
}
trig->callback = trigs[i]->cbfunc;
trig->user_tag = trigs[i]->user_tag;
if (0 > orte_pointer_array_add(&id, orte_gpr_proxy_globals.triggers, trig)) {
if (0 > orte_pointer_array_add(&trig->index, orte_gpr_proxy_globals.triggers, trig)) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
trig->id = orte_gpr_proxy_globals.num_trigs;
trigs[i]->id = trig->id;
(orte_gpr_proxy_globals.num_trigs)++;
MOVEON:
trigs[i]->id = trig->id;
}
return ORTE_SUCCESS;
@ -103,24 +139,36 @@ orte_gpr_proxy_enter_trigger(size_t cnt, orte_gpr_trigger_t **trigs)
int
orte_gpr_proxy_remove_subscription(orte_gpr_subscription_id_t id)
orte_gpr_proxy_remove_subscription(orte_gpr_proxy_subscriber_t *sub)
{
if (NULL != (orte_gpr_proxy_globals.subscriptions)->addr[id]) {
OBJ_RELEASE((orte_gpr_proxy_globals.subscriptions)->addr[id]);
orte_pointer_array_set_item(orte_gpr_proxy_globals.subscriptions, (size_t)id, NULL);
size_t index;
if (NULL == sub) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
index = sub->index;
OBJ_RELEASE(sub);
orte_pointer_array_set_item(orte_gpr_proxy_globals.subscriptions, index, NULL);
return ORTE_SUCCESS;
}
int
orte_gpr_proxy_remove_trigger(orte_gpr_trigger_id_t id)
orte_gpr_proxy_remove_trigger(orte_gpr_proxy_trigger_t *trig)
{
if (NULL != (orte_gpr_proxy_globals.triggers)->addr[id]) {
OBJ_RELEASE((orte_gpr_proxy_globals.triggers)->addr[id]);
orte_pointer_array_set_item(orte_gpr_proxy_globals.triggers, (size_t)id, NULL);
size_t index;
if (NULL == trig) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
index = trig->index;
OBJ_RELEASE(trig);
orte_pointer_array_set_item(orte_gpr_proxy_globals.triggers, index, NULL);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -45,13 +45,14 @@ orte_gpr_proxy_subscribe(size_t num_subs,
{
orte_buffer_t *cmd;
orte_buffer_t *answer;
orte_gpr_proxy_subscriber_t **subs;
int rc = ORTE_SUCCESS, ret;
size_t i;
/* need to protect against errors */
if (NULL == subscriptions && NULL == trigs) { /* need at least one */
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
return ORTE_ERR_BAD_PARAM;
}
OPAL_THREAD_LOCK(&orte_gpr_proxy_globals.mutex);
@ -83,15 +84,15 @@ orte_gpr_proxy_subscribe(size_t num_subs,
* compound cmd buffer and return
*/
if (orte_gpr_proxy_globals.compound_cmd_mode) {
if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_subscribe(orte_gpr_proxy_globals.compound_cmd,
num_subs, subscriptions,
if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_subscribe(orte_gpr_proxy_globals.compound_cmd,
num_subs, subscriptions,
num_trigs, trigs))) {
ORTE_ERROR_LOG(rc);
goto ERROR;
}
/* done */
OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex);
OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex);
return ORTE_SUCCESS;
}
@ -108,7 +109,7 @@ orte_gpr_proxy_subscribe(size_t num_subs,
num_subs, subscriptions,
num_trigs, trigs))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(cmd);
OBJ_RELEASE(cmd);
goto ERROR;
}
@ -162,8 +163,10 @@ orte_gpr_proxy_subscribe(size_t num_subs,
* numbers are NOT re-used.
*/
ERROR:
subs = (orte_gpr_proxy_subscriber_t**)(orte_gpr_proxy_globals.subscriptions)->addr;
for (i=0; i < num_subs; i++) {
if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_subscription(subscriptions[i]->id))) {
/* find the subscription on the local tracker */
if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_subscription(subs[subscriptions[i]->id]))) {
ORTE_ERROR_LOG(rc);
OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex);
return rc;
@ -179,17 +182,33 @@ int orte_gpr_proxy_unsubscribe(orte_gpr_subscription_id_t sub_number)
{
orte_buffer_t *cmd;
orte_buffer_t *answer;
orte_gpr_proxy_subscriber_t **subs;
size_t i, j;
int rc, ret;
OPAL_THREAD_LOCK(&orte_gpr_proxy_globals.mutex);
/* remove the specified subscription from the local tracker */
if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_subscription(sub_number))) {
ORTE_ERROR_LOG(rc);
OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex);
return rc;
subs = (orte_gpr_proxy_subscriber_t**)(orte_gpr_proxy_globals.subscriptions)->addr;
for (i=0, j=0; j < orte_gpr_proxy_globals.num_subs &&
i < (orte_gpr_proxy_globals.subscriptions)->size; i++) {
if (NULL != subs[i]){
j++;
if (sub_number == subs[i]->id) {
if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_subscription(subs[i]))) {
ORTE_ERROR_LOG(rc);
OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex);
return rc;
}
goto PROCESS;
}
}
}
/* must not have been found - report error */
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
PROCESS:
/* if in compound cmd mode, then just pack the command into
* that buffer and return
*/
@ -225,7 +244,7 @@ int orte_gpr_proxy_unsubscribe(orte_gpr_subscription_id_t sub_number)
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(cmd);
OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex);
return ORTE_ERR_COMM_FAILURE;
return ORTE_ERR_COMM_FAILURE;
}
OBJ_RELEASE(cmd);
@ -239,7 +258,7 @@ int orte_gpr_proxy_unsubscribe(orte_gpr_subscription_id_t sub_number)
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
OBJ_RELEASE(answer);
OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex);
return ORTE_ERR_COMM_FAILURE;
}
@ -252,14 +271,14 @@ int orte_gpr_proxy_unsubscribe(orte_gpr_subscription_id_t sub_number)
*/
if (ORTE_SUCCESS != (rc = orte_gpr_base_unpack_unsubscribe(answer, &ret))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(answer);
OBJ_RELEASE(answer);
OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex);
return rc;
}
OBJ_RELEASE(answer);
OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex);
return ret;
return ret;
}
@ -267,17 +286,33 @@ int orte_gpr_proxy_cancel_trigger(orte_gpr_trigger_id_t trig)
{
orte_buffer_t *cmd;
orte_buffer_t *answer;
orte_gpr_proxy_trigger_t **trigs;
size_t i, j;
int rc, ret;
OPAL_THREAD_LOCK(&orte_gpr_proxy_globals.mutex);
/* remove the specified trigger from the local tracker */
if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_trigger(trig))) {
ORTE_ERROR_LOG(rc);
OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex);
return rc;
trigs = (orte_gpr_proxy_trigger_t**)(orte_gpr_proxy_globals.triggers)->addr;
for (i=0, j=0; j < orte_gpr_proxy_globals.num_trigs &&
i < (orte_gpr_proxy_globals.triggers)->size; i++) {
if (NULL != trigs[i]){
j++;
if (trig == trigs[i]->id) {
if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_trigger(trigs[i]))) {
ORTE_ERROR_LOG(rc);
OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex);
return rc;
}
goto PROCESS;
}
}
}
/* must not have been found - report error */
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
PROCESS:
/* if the compound cmd mode is on, pack the command into that buffer
* and return
*/

Просмотреть файл

@ -18,13 +18,14 @@ include $(top_ompi_srcdir)/config/Makefile.options
noinst_LTLIBRARIES = libmca_gpr_replica_api.la
libmca_gpr_replica_api_la_SOURCES = \
gpr_replica_api.h \
gpr_replica_segment_ops_api.c \
gpr_replica_cleanup_api.c \
gpr_replica_compound_cmd_api.c \
gpr_replica_del_index_api.c \
gpr_replica_dump_api.c \
gpr_replica_dump_local_trigs_subs_api.c \
gpr_replica_arithmetic_ops_api.c \
gpr_replica_put_get_api.c \
gpr_replica_subscribe_api.c
gpr_replica_api.h \
gpr_replica_segment_ops_api.c \
gpr_replica_cleanup_api.c \
gpr_replica_compound_cmd_api.c \
gpr_replica_del_index_api.c \
gpr_replica_deliver_notify_msg_api.c \
gpr_replica_dump_api.c \
gpr_replica_dump_local_trigs_subs_api.c \
gpr_replica_arithmetic_ops_api.c \
gpr_replica_put_get_api.c \
gpr_replica_subscribe_api.c

Просмотреть файл

@ -129,9 +129,19 @@ int orte_gpr_replica_dump_all(int output_id);
int orte_gpr_replica_dump_segments(char *segment, int output_id);
int orte_gpr_replica_dump_triggers(int output_id);
int orte_gpr_replica_dump_triggers(orte_gpr_trigger_id_t start, int output_id);
int orte_gpr_replica_dump_subscriptions(int output_id);
int orte_gpr_replica_dump_subscriptions(orte_gpr_subscription_id_t start, int output_id);
int orte_gpr_replica_dump_a_trigger(
char *name,
orte_gpr_trigger_id_t id,
int output_id);
int orte_gpr_replica_dump_a_subscription(
char *name,
orte_gpr_subscription_id_t id,
int output_id);
int orte_gpr_replica_dump_local_triggers(int output_id);
@ -150,6 +160,8 @@ int orte_gpr_replica_dump_value(orte_gpr_value_t *value, int output_id);
*/
int orte_gpr_replica_preallocate_segment(char *name, size_t num_slots);
int orte_gpr_replica_deliver_notify_msg(orte_gpr_notify_message_t *msg);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif

Просмотреть файл

@ -35,16 +35,15 @@ int orte_gpr_replica_deliver_notify_msg(orte_gpr_notify_message_t *msg)
orte_gpr_notify_data_t **data;
orte_gpr_replica_local_trigger_t **local_trigs;
orte_gpr_replica_local_subscriber_t **local_subs, *sub;
size_t i, j, k;
size_t i, j, k, n;
int rc;
bool processed;
/* we first have to check the trigger id in the message. If that
* field is set to a valid value (i.e., one other than
* ORTE_GPR_TRIGGER_ID_MAX), then the message is intended to be
/* we first have to check if the message is a trigger message - if so,
* then the message is intended to be
* sent as a single block to that trigger's callback function.
*/
if (ORTE_GPR_TRIGGER_ID_MAX > msg->id) {
if (ORTE_GPR_TRIGGER_MSG == msg->msg_type) {
/* use the local trigger callback */
local_trigs = (orte_gpr_replica_local_trigger_t**)
(orte_gpr_replica_globals.local_triggers)->addr;
@ -69,49 +68,68 @@ int orte_gpr_replica_deliver_notify_msg(orte_gpr_notify_message_t *msg)
return ORTE_SUCCESS;
}
/* get here if the trigger id indicated that this was NOT
* intended for a trigger callback - i.e., the message should
/* get here if this wasn't a trigger message. Only other allowed message type
* is a subscription message - if that isn't the case, then we have corrupt
* data, so flag it and return
*/
if (ORTE_GPR_SUBSCRIPTION_MSG != msg->msg_type) {
ORTE_ERROR_LOG(ORTE_ERR_GPR_DATA_CORRUPT);
return ORTE_ERR_GPR_DATA_CORRUPT;
}
/* get here if we have a subscription message - i.e., the message should
* be broken into its component parts and delivered separately
* to the indicated subscribers
*/
data = (orte_gpr_notify_data_t**)(msg->data)->addr;
for (i=0; i < msg->cnt; i++) {
/* for each datagram in the message, we need to lookup
* the associated subscription (could be specified by name or id) to find the correct
* callback function. Name specifications are given precedence over id.
*/
local_subs = (orte_gpr_replica_local_subscriber_t**)
(orte_gpr_replica_globals.local_subscriptions)->addr;
processed = false;
for (j=0, k=0; !processed &&
k < orte_gpr_replica_globals.num_local_subs &&
j < (orte_gpr_replica_globals.local_subscriptions)->size; j++) {
if (NULL != local_subs[j]) {
k++;
if ((NULL != local_subs[j]->name &&
NULL != data[i]->target &&
0 == strcmp(data[i]->target, local_subs[j]->name)) ||
(data[i]->id == local_subs[j]->id)) {
sub = local_subs[j];
processed = true;
for (i=0, n=0; n < msg->cnt &&
i < (msg->data)->size; i++) {
if (NULL != data[i]) {
n++;
if (ORTE_GPR_SUBSCRIPTION_ID_MAX != data[i]->id || NULL != data[i]->target) {
/* for each datagram in the message, we need to lookup
* the associated subscription (could be specified by name or id) to find the correct
* callback function. Name specifications are given precedence over id.
*/
local_subs = (orte_gpr_replica_local_subscriber_t**)
(orte_gpr_replica_globals.local_subscriptions)->addr;
processed = false;
for (j=0, k=0; !processed &&
k < orte_gpr_replica_globals.num_local_subs &&
j < (orte_gpr_replica_globals.local_subscriptions)->size; j++) {
if (NULL != local_subs[j]) {
k++;
if (NULL != data[i]->target) {
/* if target name provided, must use it */
if (NULL != local_subs[j]->name &&
0 == strcmp(data[i]->target, local_subs[j]->name)) {
sub = local_subs[j];
processed = true;
}
} else if (data[i]->id == local_subs[j]->id) {
/* otherwise, see if id's match */
sub = local_subs[j];
processed = true;
}
}
}
}
}
/* get here and not processed => not found, abort */
if (!processed) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
/* get here and not processed => not found, abort */
if (!processed) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
sub->callback(data[i], sub->user_tag);
OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex);
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
sub->callback(data[i], sub->user_tag);
OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex);
if (data[i]->remove) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_remove_local_subscription(sub))) {
ORTE_ERROR_LOG(rc);
return rc;
if (data[i]->remove) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_remove_local_subscription(sub))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}
}
}

Просмотреть файл

@ -98,7 +98,7 @@ int orte_gpr_replica_dump_segments(char *segment, int output_id)
return rc;
}
int orte_gpr_replica_dump_triggers(int output_id)
int orte_gpr_replica_dump_triggers(orte_gpr_trigger_id_t start, int output_id)
{
orte_buffer_t *buffer;
int rc;
@ -116,7 +116,7 @@ int orte_gpr_replica_dump_triggers(int output_id)
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_triggers_fn(buffer))) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_triggers_fn(buffer, start))) {
ORTE_ERROR_LOG(rc);
}
@ -130,7 +130,7 @@ int orte_gpr_replica_dump_triggers(int output_id)
return rc;
}
int orte_gpr_replica_dump_subscriptions(int output_id)
int orte_gpr_replica_dump_subscriptions(orte_gpr_subscription_id_t start, int output_id)
{
orte_buffer_t *buffer;
int rc;
@ -143,7 +143,7 @@ int orte_gpr_replica_dump_subscriptions(int output_id)
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_subscriptions_fn(buffer))) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_subscriptions_fn(buffer, start))) {
ORTE_ERROR_LOG(rc);
}
@ -157,6 +157,133 @@ int orte_gpr_replica_dump_subscriptions(int output_id)
return rc;
}
int orte_gpr_replica_dump_a_trigger(
char *name,
orte_gpr_trigger_id_t id,
int output_id)
{
orte_buffer_t buffer;
orte_gpr_replica_trigger_t **trigs;
size_t i, j;
int rc;
OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex);
OBJ_CONSTRUCT(&buffer, orte_buffer_t);
if (NULL == name) { /* dump the trigger corresponding to the provided id */
trigs = (orte_gpr_replica_trigger_t**)(orte_gpr_replica.triggers)->addr;
for (i=0, j=0; j < orte_gpr_replica.num_trigs &&
i < (orte_gpr_replica.triggers)->size; i++) {
if (NULL != trigs[i]) {
j++;
if (id == trigs[i]->index) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_trigger(&buffer, trigs[i]))) {
ORTE_ERROR_LOG(rc);
}
goto PROCESS;
}
}
}
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
OBJ_DESTRUCT(&buffer);
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
return ORTE_ERR_NOT_FOUND;
} else { /* dump the named trigger */
trigs = (orte_gpr_replica_trigger_t**)(orte_gpr_replica.triggers)->addr;
for (i=0, j=0; j < orte_gpr_replica.num_trigs &&
i < (orte_gpr_replica.triggers)->size; i++) {
if (NULL != trigs[i]) {
j++;
if (0 == strcmp(name, trigs[i]->name)) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_trigger(&buffer, trigs[i]))) {
ORTE_ERROR_LOG(rc);
}
goto PROCESS;
}
}
}
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
OBJ_DESTRUCT(&buffer);
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
return ORTE_ERR_NOT_FOUND;
}
PROCESS:
if (ORTE_SUCCESS == rc) {
orte_gpr_base_print_dump(&buffer, output_id);
}
OBJ_DESTRUCT(&buffer);
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
return rc;
}
int orte_gpr_replica_dump_a_subscription(char *name,
orte_gpr_subscription_id_t id,
int output_id)
{
orte_buffer_t buffer;
orte_gpr_replica_subscription_t **subs;
size_t i, j;
int rc;
OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex);
OBJ_CONSTRUCT(&buffer, orte_buffer_t);
if (NULL == name) { /* dump the subscription corresponding to the provided id */
subs = (orte_gpr_replica_subscription_t**)(orte_gpr_replica.subscriptions)->addr;
for (i=0, j=0; j < orte_gpr_replica.num_subs &&
i < (orte_gpr_replica.subscriptions)->size; i++) {
if (NULL != subs[i]) {
j++;
if (id == subs[i]->index) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_subscription(&buffer, subs[i]))) {
ORTE_ERROR_LOG(rc);
}
goto PROCESS;
}
}
}
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
OBJ_DESTRUCT(&buffer);
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
return ORTE_ERR_NOT_FOUND;
} else { /* dump the named subscription */
subs = (orte_gpr_replica_subscription_t**)(orte_gpr_replica.subscriptions)->addr;
for (i=0, j=0; j < orte_gpr_replica.num_subs &&
i < (orte_gpr_replica.subscriptions)->size; i++) {
if (NULL != subs[i]) {
j++;
if (0 == strcmp(name, subs[i]->name)) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_subscription(&buffer, subs[i]))) {
ORTE_ERROR_LOG(rc);
}
goto PROCESS;
}
}
}
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
OBJ_DESTRUCT(&buffer);
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
return ORTE_ERR_NOT_FOUND;
}
PROCESS:
if (ORTE_SUCCESS == rc) {
orte_gpr_base_print_dump(&buffer, output_id);
}
OBJ_DESTRUCT(&buffer);
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
return rc;
}
int orte_gpr_replica_dump_callbacks(int output_id)
{
orte_buffer_t *buffer;

Просмотреть файл

@ -53,6 +53,11 @@ int orte_gpr_replica_dump_local_triggers(int output_id)
} else {
opal_output(output_id, "\ttrigger name: %s", trigs[j]->name);
}
if (NULL == trigs[j]->callback) {
opal_output(output_id, "\tNULL callback");
} else {
opal_output(output_id, "\tCallback %0x", trigs[j]->callback);
}
}
}
return ORTE_SUCCESS;
@ -78,7 +83,11 @@ int orte_gpr_replica_dump_local_subscriptions(int output_id)
} else {
opal_output(output_id, "\tsubscription name: %s", subs[j]->name);
}
}
if (NULL == subs[j]->callback) {
opal_output(output_id, "\tNULL callback");
} else {
opal_output(output_id, "\tCallback %0x", subs[j]->callback);
} }
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -25,10 +25,10 @@
#include "orte_config.h"
#include "dps/dps.h"
#include "orte/dps/dps.h"
#include "mca/ns/ns.h"
#include "mca/errmgr/errmgr.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/errmgr/errmgr.h"
#include "gpr_replica_api.h"
@ -43,7 +43,7 @@ orte_gpr_replica_subscribe(size_t num_subs,
/* protect against errors */
if (NULL == subscriptions && NULL == trigs) { /* need at least one */
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
return ORTE_ERR_BAD_PARAM;
}
OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex);
@ -99,11 +99,31 @@ orte_gpr_replica_subscribe(size_t num_subs,
int orte_gpr_replica_unsubscribe(orte_gpr_subscription_id_t sub_number)
{
orte_gpr_replica_local_subscriber_t **subs;
size_t i, j;
int rc;
OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex);
rc = orte_gpr_replica_remove_subscription(NULL, sub_number);
if (ORTE_SUCCESS != (rc = orte_gpr_replica_remove_subscription(NULL, sub_number))) {
ORTE_ERROR_LOG(rc);
}
if (ORTE_SUCCESS == rc) {
/* find and remove it from the local subscription tracking system */
subs = (orte_gpr_replica_local_subscriber_t**)(orte_gpr_replica_globals.local_subscriptions)->addr;
for (i=0, j=0; j < orte_gpr_replica_globals.num_local_subs &&
i < (orte_gpr_replica_globals.local_subscriptions)->size; i++) {
if (NULL != subs[i]) {
j++;
if (sub_number == subs[i]->id) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_remove_local_subscription(subs[i]))) {
ORTE_ERROR_LOG(rc);
}
}
}
}
}
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
@ -113,12 +133,30 @@ int orte_gpr_replica_unsubscribe(orte_gpr_subscription_id_t sub_number)
int orte_gpr_replica_cancel_trigger(orte_gpr_trigger_id_t trig)
{
orte_gpr_replica_local_trigger_t **trigs;
size_t i, j;
int rc;
OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex);
rc = orte_gpr_replica_remove_trigger(NULL, trig);
if (ORTE_SUCCESS == rc) {
/* find and remove it from the local trigger tracking system */
trigs = (orte_gpr_replica_local_trigger_t**)(orte_gpr_replica_globals.local_triggers)->addr;
for (i=0, j=0; j < orte_gpr_replica_globals.num_local_trigs &&
i < (orte_gpr_replica_globals.local_triggers)->size; i++) {
if (NULL != trigs[i]) {
j++;
if (trig == trigs[i]->id) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_remove_local_trigger(trigs[i]))) {
ORTE_ERROR_LOG(rc);
}
}
}
}
}
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
return rc;

Просмотреть файл

@ -218,7 +218,7 @@ int orte_gpr_replica_process_command_buffer(orte_buffer_t *input_buffer,
opal_output(0, "\tdump triggers cmd");
}
if (ORTE_SUCCESS != (ret = orte_gpr_replica_recv_dump_triggers_cmd(answer))) {
if (ORTE_SUCCESS != (ret = orte_gpr_replica_recv_dump_triggers_cmd(input_buffer, answer))) {
ORTE_ERROR_LOG(ret);
goto RETURN_ERROR;
}
@ -232,7 +232,7 @@ int orte_gpr_replica_process_command_buffer(orte_buffer_t *input_buffer,
opal_output(0, "\tdump subscriptions cmd");
}
if (ORTE_SUCCESS != (ret = orte_gpr_replica_recv_dump_subscriptions_cmd(answer))) {
if (ORTE_SUCCESS != (ret = orte_gpr_replica_recv_dump_subscriptions_cmd(input_buffer, answer))) {
ORTE_ERROR_LOG(ret);
goto RETURN_ERROR;
}
@ -240,6 +240,32 @@ int orte_gpr_replica_process_command_buffer(orte_buffer_t *input_buffer,
case ORTE_GPR_DUMP_A_TRIGGER_CMD: /***** DUMP *****/
if (orte_gpr_replica_globals.debug) {
opal_output(0, "\tdump a trigger cmd");
}
if (ORTE_SUCCESS != (ret = orte_gpr_replica_recv_dump_a_trigger_cmd(input_buffer, answer))) {
ORTE_ERROR_LOG(ret);
goto RETURN_ERROR;
}
break;
case ORTE_GPR_DUMP_A_SUBSCRIPTION_CMD: /***** DUMP *****/
if (orte_gpr_replica_globals.debug) {
opal_output(0, "\tdump a subscription cmd");
}
if (ORTE_SUCCESS != (ret = orte_gpr_replica_recv_dump_a_subscription_cmd(input_buffer, answer))) {
ORTE_ERROR_LOG(ret);
goto RETURN_ERROR;
}
break;
case ORTE_GPR_DUMP_CALLBACKS_CMD: /***** DUMP *****/
if (orte_gpr_replica_globals.debug) {

Просмотреть файл

@ -111,9 +111,17 @@ int orte_gpr_replica_recv_dump_all_cmd(orte_buffer_t *answer);
int orte_gpr_replica_recv_dump_segments_cmd(orte_buffer_t *input_buffer, orte_buffer_t *answer);
int orte_gpr_replica_recv_dump_triggers_cmd(orte_buffer_t *answer);
int orte_gpr_replica_recv_dump_triggers_cmd(orte_buffer_t *input_buffer,
orte_buffer_t *answer);
int orte_gpr_replica_recv_dump_subscriptions_cmd(orte_buffer_t *answer);
int orte_gpr_replica_recv_dump_subscriptions_cmd(orte_buffer_t *input_buffer,
orte_buffer_t *answer);
int orte_gpr_replica_recv_dump_a_trigger_cmd(orte_buffer_t *input_buffer,
orte_buffer_t *answer);
int orte_gpr_replica_recv_dump_a_subscription_cmd(orte_buffer_t *input_buffer,
orte_buffer_t *answer);
int orte_gpr_replica_recv_dump_callbacks_cmd(orte_buffer_t *answer);

Просмотреть файл

@ -74,9 +74,12 @@ int orte_gpr_replica_recv_dump_segments_cmd(orte_buffer_t *input_buffer, orte_bu
return rc;
}
int orte_gpr_replica_recv_dump_triggers_cmd(orte_buffer_t *answer)
int orte_gpr_replica_recv_dump_triggers_cmd(orte_buffer_t *input_buffer,
orte_buffer_t *answer)
{
orte_gpr_cmd_flag_t command=ORTE_GPR_DUMP_TRIGGERS_CMD;
orte_gpr_trigger_id_t start;
size_t n;
int rc;
if (ORTE_SUCCESS != (rc = orte_dps.pack(answer, &command, 1, ORTE_GPR_CMD))) {
@ -84,7 +87,13 @@ int orte_gpr_replica_recv_dump_triggers_cmd(orte_buffer_t *answer)
return rc;
}
rc = orte_gpr_replica_dump_triggers_fn(answer);
n=1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(input_buffer, &start, &n, ORTE_GPR_TRIGGER_ID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = orte_gpr_replica_dump_triggers_fn(answer, start);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
@ -92,9 +101,12 @@ int orte_gpr_replica_recv_dump_triggers_cmd(orte_buffer_t *answer)
return rc;
}
int orte_gpr_replica_recv_dump_subscriptions_cmd(orte_buffer_t *answer)
int orte_gpr_replica_recv_dump_subscriptions_cmd(orte_buffer_t *input_buffer,
orte_buffer_t *answer)
{
orte_gpr_cmd_flag_t command=ORTE_GPR_DUMP_SUBSCRIPTIONS_CMD;
orte_gpr_subscription_id_t start;
size_t n;
int rc;
if (ORTE_SUCCESS != (rc = orte_dps.pack(answer, &command, 1, ORTE_GPR_CMD))) {
@ -102,7 +114,13 @@ int orte_gpr_replica_recv_dump_subscriptions_cmd(orte_buffer_t *answer)
return rc;
}
rc = orte_gpr_replica_dump_subscriptions_fn(answer);
n=1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(input_buffer, &start, &n, ORTE_GPR_SUBSCRIPTION_ID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = orte_gpr_replica_dump_subscriptions_fn(answer, start);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
@ -110,6 +128,135 @@ int orte_gpr_replica_recv_dump_subscriptions_cmd(orte_buffer_t *answer)
return rc;
}
int orte_gpr_replica_recv_dump_a_trigger_cmd(orte_buffer_t *input_buffer,
orte_buffer_t *answer)
{
orte_gpr_cmd_flag_t command=ORTE_GPR_DUMP_A_TRIGGER_CMD;
char *name;
orte_gpr_trigger_id_t id;
orte_gpr_replica_trigger_t **trigs;
size_t n, i, j;
int rc;
if (ORTE_SUCCESS != (rc = orte_dps.pack(answer, &command, 1, ORTE_GPR_CMD))) {
ORTE_ERROR_LOG(rc);
return rc;
}
n=1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(input_buffer, &name, &n, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
n=1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(input_buffer, &id, &n, ORTE_GPR_TRIGGER_ID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (NULL == name) { /* dump the trigger corresponding to the provided id */
trigs = (orte_gpr_replica_trigger_t**)(orte_gpr_replica.triggers)->addr;
for (i=0, j=0; j < orte_gpr_replica.num_trigs &&
i < (orte_gpr_replica.triggers)->size; i++) {
if (NULL != trigs[i]) {
j++;
if (id == trigs[i]->index) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_trigger(answer, trigs[i]))) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
}
}
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
} else { /* dump the named trigger */
trigs = (orte_gpr_replica_trigger_t**)(orte_gpr_replica.triggers)->addr;
for (i=0, j=0; j < orte_gpr_replica.num_trigs &&
i < (orte_gpr_replica.triggers)->size; i++) {
if (NULL != trigs[i]) {
j++;
if (0 == strcmp(name, trigs[i]->name)) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_trigger(answer, trigs[i]))) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
}
}
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
}
return rc;
}
int orte_gpr_replica_recv_dump_a_subscription_cmd(orte_buffer_t *input_buffer,
orte_buffer_t *answer)
{
orte_gpr_cmd_flag_t command=ORTE_GPR_DUMP_A_SUBSCRIPTION_CMD;
orte_gpr_replica_subscription_t **subs;
orte_gpr_subscription_id_t id;
size_t n, i, j;
char *name;
int rc;
if (ORTE_SUCCESS != (rc = orte_dps.pack(answer, &command, 1, ORTE_GPR_CMD))) {
ORTE_ERROR_LOG(rc);
return rc;
}
n=1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(input_buffer, &name, &n, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
n=1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(input_buffer, &id, &n, ORTE_GPR_SUBSCRIPTION_ID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (NULL == name) { /* dump the subscription corresponding to the provided id */
subs = (orte_gpr_replica_subscription_t**)(orte_gpr_replica.subscriptions)->addr;
for (i=0, j=0; j < orte_gpr_replica.num_subs &&
i < (orte_gpr_replica.subscriptions)->size; i++) {
if (NULL != subs[i]) {
j++;
if (id == subs[i]->index) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_subscription(answer, subs[i]))) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
}
}
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
} else { /* dump the named subscription */
subs = (orte_gpr_replica_subscription_t**)(orte_gpr_replica.subscriptions)->addr;
for (i=0, j=0; j < orte_gpr_replica.num_subs &&
i < (orte_gpr_replica.subscriptions)->size; i++) {
if (NULL != subs[i]) {
j++;
if (0 == strcmp(name, subs[i]->name)) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_subscription(answer, subs[i]))) {
ORTE_ERROR_LOG(rc);
}
free(name);
return rc;
}
}
}
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
}
return rc;
}
int orte_gpr_replica_recv_dump_callbacks_cmd(orte_buffer_t *answer)
{
orte_gpr_cmd_flag_t command=ORTE_GPR_DUMP_CALLBACKS_CMD;

Просмотреть файл

@ -43,11 +43,6 @@ static void orte_gpr_replica_dump_load_string(orte_buffer_t *buffer, char **tmp)
void orte_gpr_replica_dump_itagval_value(orte_buffer_t *buffer,
orte_gpr_replica_itagval_t *iptr);
static void orte_gpr_replica_dump_trigger(orte_buffer_t *buffer, size_t cnt,
orte_gpr_replica_trigger_t *trig);
static void orte_gpr_replica_dump_subscription(orte_buffer_t *buffer,
orte_gpr_replica_subscription_t *sub);
int orte_gpr_replica_dump_all_fn(orte_buffer_t *buffer)
{
@ -58,11 +53,11 @@ int orte_gpr_replica_dump_all_fn(orte_buffer_t *buffer)
sprintf(tmp_out, "\n\n\nDUMP OF GENERAL PURPOSE REGISTRY");
orte_gpr_replica_dump_load_string(buffer, &tmp);
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_triggers_fn(buffer))) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_triggers_fn(buffer, 0))) {
return rc;
}
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_subscriptions_fn(buffer))) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_subscriptions_fn(buffer, 0))) {
return rc;
}
@ -339,11 +334,13 @@ int orte_gpr_replica_dump_callbacks_fn(orte_buffer_t *buffer)
}
int orte_gpr_replica_dump_triggers_fn(orte_buffer_t *buffer)
int orte_gpr_replica_dump_triggers_fn(orte_buffer_t *buffer,
orte_gpr_trigger_id_t start)
{
orte_gpr_replica_trigger_t **trig;
char tmp_out[100], *tmp;
size_t j, k;
size_t j, k, m;
int rc;
tmp = tmp_out;
sprintf(tmp_out, "\nDUMP OF GPR TRIGGERS\n");
@ -354,10 +351,21 @@ int orte_gpr_replica_dump_triggers_fn(orte_buffer_t *buffer)
orte_gpr_replica_dump_load_string(buffer, &tmp);
/* dump the trigger info for the registry */
if (0 == start) { /* dump the whole thing */
m = 0;
} else {
m = orte_gpr_replica.num_trigs - start;
}
for (j=0, k=0; k < orte_gpr_replica.num_trigs &&
j < (orte_gpr_replica.triggers)->size; j++) {
if (NULL != trig[j]) {
orte_gpr_replica_dump_trigger(buffer, k, trig[j]);
if (k >= m) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_trigger(buffer, trig[j]))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
k++;
}
}
@ -365,8 +373,8 @@ int orte_gpr_replica_dump_triggers_fn(orte_buffer_t *buffer)
return ORTE_SUCCESS;
}
static void orte_gpr_replica_dump_trigger(orte_buffer_t *buffer, size_t cnt,
orte_gpr_replica_trigger_t *trig)
int orte_gpr_replica_dump_trigger(orte_buffer_t *buffer,
orte_gpr_replica_trigger_t *trig)
{
char *tmp_out, *token;
size_t i, j;
@ -377,7 +385,7 @@ static void orte_gpr_replica_dump_trigger(orte_buffer_t *buffer, size_t cnt,
tmp_out = (char*)malloc(1000);
if (NULL == tmp_out) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return;
return ORTE_ERR_OUT_OF_RESOURCE;
}
sprintf(tmp_out, "\nData for trigger %lu", (unsigned long) trig->index);
@ -501,14 +509,16 @@ static void orte_gpr_replica_dump_trigger(orte_buffer_t *buffer, size_t cnt,
}
free(tmp_out);
return;
return ORTE_SUCCESS;
}
int orte_gpr_replica_dump_subscriptions_fn(orte_buffer_t *buffer)
int orte_gpr_replica_dump_subscriptions_fn(orte_buffer_t *buffer,
orte_gpr_subscription_id_t start)
{
char *tmp_out, *tmp;
size_t i, m;
size_t i, m, n;
orte_gpr_replica_subscription_t **subs;
int rc;
tmp_out = (char*)malloc(1000);
if (NULL == tmp_out) {
@ -525,19 +535,30 @@ int orte_gpr_replica_dump_subscriptions_fn(orte_buffer_t *buffer)
orte_gpr_replica_dump_load_string(buffer, &tmp);
/* dump the subscription info for the registry */
if (0 == start) { /* dump the whole thing */
n = 0;
} else {
n = orte_gpr_replica.num_subs - start;
}
for (i=0, m=0; m < orte_gpr_replica.num_subs &&
i < (orte_gpr_replica.subscriptions)->size; i++) {
if (NULL != subs[i]) {
if (m >= n) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_subscription(buffer, subs[i]))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
m++;
orte_gpr_replica_dump_subscription(buffer, subs[i]);
}
}
free(tmp_out);
return ORTE_SUCCESS;
}
static void orte_gpr_replica_dump_subscription(orte_buffer_t *buffer,
orte_gpr_replica_subscription_t *sub)
int orte_gpr_replica_dump_subscription(orte_buffer_t *buffer,
orte_gpr_replica_subscription_t *sub)
{
char *tmp_out, *token, *tmp;
size_t j, k, n, p;
@ -547,17 +568,17 @@ static void orte_gpr_replica_dump_subscription(orte_buffer_t *buffer,
tmp_out = (char*)malloc(1000);
if (NULL == tmp_out) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return;
return ORTE_ERR_OUT_OF_RESOURCE;
}
tmp = tmp_out;
if (NULL == sub->name) {
sprintf(tmp, "\nSubscription %lu: UNNAMED",
(unsigned long) sub->index);
sprintf(tmp, "\nSubscription %lu: UNNAMED idtag %lu",
(unsigned long) sub->index, (unsigned long) sub->idtag);
} else {
sprintf(tmp, "\nSubscription %lu name %s",
sprintf(tmp, "\nSubscription %lu: name %s idtag %lu",
(unsigned long) sub->index,
sub->name);
sub->name, (unsigned long) sub->idtag);
}
orte_gpr_replica_dump_load_string(buffer, &tmp);
@ -652,10 +673,10 @@ static void orte_gpr_replica_dump_subscription(orte_buffer_t *buffer,
if (ORTE_SUCCESS == orte_gpr_replica_dict_reverse_lookup(&token, ivals[n]->seg,
ORTE_VALUE_ARRAY_GET_ITEM(&(ivals[n]->tokentags), orte_gpr_replica_itag_t, j))) {
sprintf(tmp_out, "\t\t\tToken: %s", token);
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
free(token);
}
}
}
}
sprintf(tmp_out, "\t\tToken addressing mode:\n");
@ -727,7 +748,7 @@ static void orte_gpr_replica_dump_subscription(orte_buffer_t *buffer,
} /* for n */
free(tmp_out);
return;
return ORTE_SUCCESS;
}
@ -757,8 +778,7 @@ void orte_gpr_replica_dump_itagval_value(orte_buffer_t *buffer,
break;
case ORTE_PID:
sprintf(tmp, "\t\tData type: ORTE_PID\tValue: " ORTE_PID_T_PRINTF,
iptr->value.pid);
sprintf(tmp, "\t\tData type: ORTE_PID\tValue: %lu", (unsigned long)iptr->value.pid);
break;
case ORTE_INT:

Просмотреть файл

@ -126,11 +126,19 @@ int orte_gpr_replica_dump_segments_fn(orte_buffer_t *buffer, char *segment);
int orte_gpr_replica_dump_a_segment_fn(orte_buffer_t *buffer, orte_gpr_replica_segment_t *seg);
int orte_gpr_replica_dump_triggers_fn(orte_buffer_t *buffer);
int orte_gpr_replica_dump_triggers_fn(orte_buffer_t *buffer,
orte_gpr_trigger_id_t start);
int orte_gpr_replica_dump_subscriptions_fn(orte_buffer_t *buffer);
int orte_gpr_replica_dump_subscriptions_fn(orte_buffer_t *buffer,
orte_gpr_subscription_id_t start);
int orte_gpr_replica_dump_callbacks_fn(orte_buffer_t *buffer);
int orte_gpr_replica_dump_trigger(orte_buffer_t *buffer,
orte_gpr_replica_trigger_t *trig);
int orte_gpr_replica_dump_subscription(orte_buffer_t *buffer,
orte_gpr_replica_subscription_t *sub);
int orte_gpr_replica_dump_callbacks_fn(orte_buffer_t *buffer);
/*
* ********* INTERNAL UTILITY FUNCTIONS **********
@ -185,10 +193,10 @@ int orte_gpr_replica_delete_itagval(orte_gpr_replica_segment_t *seg,
*/
bool orte_gpr_replica_check_itag_list(orte_gpr_replica_addr_mode_t mode,
size_t num_itags_search,
orte_gpr_replica_itag_t *itags,
size_t num_itags_entry,
orte_gpr_replica_itag_t *entry_itags);
size_t num_itags_search,
orte_gpr_replica_itag_t *itags,
size_t num_itags_entry,
orte_gpr_replica_itag_t *entry_itags);
int orte_gpr_replica_copy_itag_list(orte_gpr_replica_itag_t **dest,
orte_gpr_replica_itag_t *src, size_t num_itags);
@ -203,9 +211,9 @@ int orte_gpr_replica_enter_local_subscription(size_t cnt, orte_gpr_subscription_
int orte_gpr_replica_enter_local_trigger(size_t cnt, orte_gpr_trigger_t **trigs);
int orte_gpr_replica_remove_local_subscription(orte_gpr_subscription_id_t id);
int orte_gpr_replica_remove_local_subscription(orte_gpr_replica_local_subscriber_t *sub);
int orte_gpr_proxy_remove_local_trigger(orte_gpr_trigger_id_t id);
int orte_gpr_replica_remove_local_trigger(orte_gpr_replica_local_trigger_t *trig);
int orte_gpr_replica_record_action(orte_gpr_replica_segment_t *seg,
orte_gpr_replica_container_t *cptr,
@ -247,18 +255,20 @@ int
orte_gpr_replica_remove_trigger(orte_process_name_t *requestor,
orte_gpr_trigger_id_t id);
int orte_gpr_replica_register_callback(orte_gpr_replica_trigger_t *trig,
orte_gpr_replica_subscription_t *sub,
int orte_gpr_replica_register_callback(orte_gpr_replica_subscription_t *sub,
orte_gpr_value_t *value);
int orte_gpr_replica_define_callback(orte_gpr_replica_callbacks_t **cbptr,
int orte_gpr_replica_register_trigger_callback(orte_gpr_replica_trigger_t *trig);
int orte_gpr_replica_define_callback(orte_gpr_notify_msg_type_t msg_type,
orte_gpr_replica_callbacks_t **cbptr,
orte_process_name_t *recipient);
int orte_gpr_replica_process_callbacks(void);
int orte_gpr_replica_purge_subscriptions(orte_process_name_t *proc);
int orte_gpr_replica_store_value_in_msg(orte_gpr_subscription_id_t id,
int orte_gpr_replica_store_value_in_msg(orte_gpr_replica_requestor_t *req,
orte_gpr_notify_message_t *msg,
size_t cnt,
orte_gpr_value_t **values);

Просмотреть файл

@ -40,7 +40,7 @@ int
orte_gpr_replica_enter_local_subscription(size_t cnt, orte_gpr_subscription_t **subscriptions)
{
orte_gpr_replica_local_subscriber_t *sub;
size_t i, id;
size_t i;
for (i=0; i < cnt; i++) {
sub = OBJ_NEW(orte_gpr_replica_local_subscriber_t);
@ -53,7 +53,7 @@ orte_gpr_replica_enter_local_subscription(size_t cnt, orte_gpr_subscription_t **
}
sub->callback = subscriptions[i]->cbfunc;
sub->user_tag = subscriptions[i]->user_tag;
if (0 > orte_pointer_array_add(&id, orte_gpr_replica_globals.local_subscriptions, sub)) {
if (0 > orte_pointer_array_add(&sub->index, orte_gpr_replica_globals.local_subscriptions, sub)) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
@ -69,10 +69,45 @@ orte_gpr_replica_enter_local_subscription(size_t cnt, orte_gpr_subscription_t **
int
orte_gpr_replica_enter_local_trigger(size_t cnt, orte_gpr_trigger_t **trigs)
{
orte_gpr_replica_local_trigger_t *trig;
size_t i, id;
orte_gpr_replica_local_trigger_t *trig, **tptr;
size_t i, j, k;
for (i=0; i < cnt; i++) {
/* If the provided trigger has a name, see if it already is on
* the local trigger list. If so, then check to see if we
* already defined a return point for it and/or if this trigger
* doesn't - in either of those two cases, we ignore the
* trigger and just use the existing entry
*/
if (NULL != trigs[i]->name) {
tptr = (orte_gpr_replica_local_trigger_t**)(orte_gpr_replica_globals.local_triggers)->addr;
for (j=0, k=0; k < orte_gpr_replica_globals.num_local_trigs &&
j < (orte_gpr_replica_globals.local_triggers)->size; j++) {
if (NULL != tptr[j]) {
k++;
if (0 == strcmp(tptr[j]->name, trigs[i]->name)) {
/* same name - trigger is already on list */
if (NULL != tptr[j]->callback || NULL == trigs[i]->cbfunc) {
/* ignore these cases */
trig = tptr[j];
goto MOVEON;
}
/* reach here if either the prior trigger didn't provide
* a callback, and the new one provides one. In this
* case, we update the existing trigger callback and then
* move on
*/
tptr[j]->callback = trigs[i]->cbfunc;
trig = tptr[j];
goto MOVEON;
}
}
}
}
/* either the trigger doesn't have a name, OR it did, but it isn't
* already on the list - add it to the list now
*/
trig = OBJ_NEW(orte_gpr_replica_local_trigger_t);
if (NULL == trig) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
@ -94,35 +129,49 @@ orte_gpr_replica_enter_local_trigger(size_t cnt, orte_gpr_trigger_t **trigs)
}
trig->callback = trigs[i]->cbfunc;
trig->user_tag = trigs[i]->user_tag;
if (0 > orte_pointer_array_add(&id, orte_gpr_replica_globals.local_triggers, trig)) {
if (0 > orte_pointer_array_add(&trig->index, orte_gpr_replica_globals.local_triggers, trig)) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
trig->id = orte_gpr_replica_globals.num_local_trigs;
trigs[i]->id = trig->id;
(orte_gpr_replica_globals.num_local_trigs)++;
MOVEON:
trigs[i]->id = trig->id;
}
return ORTE_SUCCESS;
}
int orte_gpr_replica_remove_local_subscription(orte_gpr_subscription_id_t id)
int orte_gpr_replica_remove_local_subscription(orte_gpr_replica_local_subscriber_t *sub)
{
if (NULL != (orte_gpr_replica_globals.local_subscriptions)->addr[id]) {
OBJ_RELEASE((orte_gpr_replica_globals.local_subscriptions)->addr[id]);
orte_pointer_array_set_item(orte_gpr_replica_globals.local_subscriptions, (size_t)id, NULL);
size_t index;
if (NULL == sub) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
index = sub->index;
OBJ_RELEASE(sub);
orte_pointer_array_set_item(orte_gpr_replica_globals.local_subscriptions, index, NULL);
return ORTE_SUCCESS;
}
int orte_gpr_proxy_remove_local_trigger(orte_gpr_trigger_id_t id)
int orte_gpr_replica_remove_local_trigger(orte_gpr_replica_local_trigger_t *trig)
{
if (NULL != (orte_gpr_replica_globals.local_triggers)->addr[id]) {
OBJ_RELEASE((orte_gpr_replica_globals.local_triggers)->addr[id]);
orte_pointer_array_set_item(orte_gpr_replica_globals.local_triggers, (size_t)id, NULL);
size_t index;
if (NULL == trig) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
index = trig->index;
OBJ_RELEASE(trig);
orte_pointer_array_set_item(orte_gpr_replica_globals.local_triggers, index, NULL);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -39,17 +39,21 @@
#include "mca/gpr/replica/communications/gpr_replica_comm.h"
#include "gpr_replica_fn.h"
static int orte_gpr_replica_get_callback_data(orte_gpr_value_t ***values, size_t *num_vals,
orte_gpr_replica_subscription_t *sub);
static int orte_gpr_replica_store_value_in_trigger_msg(orte_gpr_replica_subscription_t *sub,
orte_gpr_notify_message_t *msg,
size_t cnt,
orte_gpr_value_t **values);
int orte_gpr_replica_process_callbacks(void)
{
orte_gpr_replica_callbacks_t *cb;
orte_gpr_notify_data_t **data;
orte_gpr_replica_trigger_t **trigs;
orte_gpr_replica_local_trigger_t **local_trigs;
orte_gpr_replica_subscription_t **subs;
orte_gpr_replica_local_subscriber_t **local_subs;
orte_gpr_replica_requestor_t **reqs;
size_t i, j, k, m;
bool processed;
int rc;
/* check and set flag indicating callbacks being processed */
@ -59,80 +63,23 @@ int orte_gpr_replica_process_callbacks(void)
orte_gpr_replica.processing_callbacks = true;
while (NULL != (cb = (orte_gpr_replica_callbacks_t*)opal_list_remove_last(&orte_gpr_replica.callbacks))) {
/* each callback corresponds to a specific requestor
* The message in the callback consists of at least one (and can
* be more) "datagrams" intended for that requestor, each of which
* is slated to be returned to a specific function on the requestor.
*/
if (NULL == cb->requestor) { /* local callback */
/* each callback corresponds to a specific requestor
* The message in the callback consists of at least one (and can
* be more) "datagrams" intended for that requestor, each of which
* is slated to be returned to a specific
* function on the requestor.
*
* Since this requestor is "local", we simply execute
/* Since this requestor is "local", we simply execute
* the callbacks ourself.
*/
/* we first have to check the trigger id in the message. If that
* field is set to a valid value (i.e., one other than
* ORTE_GPR_TRIGGER_ID_MAX), then the message is intended to be
* sent as a single block to that trigger's callback function.
*/
if (ORTE_GPR_TRIGGER_ID_MAX > (cb->message)->id) {
/* use the local trigger callback */
local_trigs = (orte_gpr_replica_local_trigger_t**)
(orte_gpr_replica_globals.local_triggers)->addr;
for (i=0, j=0; j < orte_gpr_replica_globals.num_local_trigs &&
i < (orte_gpr_replica_globals.local_triggers)->size; i++) {
if (NULL != local_trigs[i]) {
j++;
if ((cb->message)->id == local_trigs[i]->id) {
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
local_trigs[i]->callback(cb->message, local_trigs[i]->user_tag);
OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex);
goto CLEANUP;
}
}
}
/* get here if the trigger could not be found */
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
goto CLEANUP;
}
/* get here if the trigger id indicated that this was NOT
* intended for a trigger callback - i.e., the message should
* be broken into its component parts and delivered separately
* to the indicated subscribers
*/
data = (orte_gpr_notify_data_t**)((cb->message)->data)->addr;
for (i=0; i < (cb->message)->cnt; i++) {
/* for each datagram in the message, we need to lookup
* the associated subscription id to find the correct
* callback function.
*/
local_subs = (orte_gpr_replica_local_subscriber_t**)
(orte_gpr_replica_globals.local_subscriptions)->addr;
processed = false;
for (j=0, k=0; !processed &&
k < orte_gpr_replica_globals.num_local_subs &&
j < (orte_gpr_replica_globals.local_subscriptions)->size; j++) {
if (NULL != local_subs[j]) {
k++;
if (data[i]->id == local_subs[j]->id) {
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
local_subs[j]->callback(data[i], local_subs[j]->user_tag);
OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex);
processed = true;
}
}
}
/* get here and not processed => not found */
if (!processed) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
}
if (ORTE_SUCCESS != (rc = orte_gpr_replica_deliver_notify_msg(cb->message))) {
ORTE_ERROR_LOG(rc);
}
} else { /* remote request - send messages back */
orte_gpr_replica_remote_notify(cb->requestor, cb->message);
}
CLEANUP:
OBJ_RELEASE(cb);
}
@ -192,16 +139,14 @@ CLEANUP:
int orte_gpr_replica_register_callback(orte_gpr_replica_trigger_t *trig,
orte_gpr_replica_subscription_t *sub,
int orte_gpr_replica_register_callback(orte_gpr_replica_subscription_t *sub,
orte_gpr_value_t *value)
{
orte_gpr_replica_callbacks_t *cb;
orte_gpr_replica_requestor_t **reqs;
size_t interim, cnt, num_tokens, num_keys;
orte_gpr_value_t **vals, **values;
orte_gpr_replica_ivalue_t **ivals;
size_t i, j, k;
orte_gpr_value_t **values;
size_t cnt;
size_t i, j;
bool cleanup_reqd;
int rc;
@ -210,75 +155,14 @@ int orte_gpr_replica_register_callback(orte_gpr_replica_trigger_t *trig,
* already been provided) so we have it ready to be added to
* the callback
*/
/* check to see if value provided - if so, we'll just use it */
if (NULL != value) {
if (NULL != value) { /* no need to get data - already provided */
values = &value;
cnt = 1;
cleanup_reqd = false;
} else {
/* value not provided - get the data off the registry. since a
* subscription can have multiple data sources specified, we
* have to loop through those sources, constructing an aggregated
* array of data values that we can work with in composing the
* final message
*/
ivals = (orte_gpr_replica_ivalue_t**)(sub->values)->addr;
cnt = 0;
values = NULL;
for (i=0, j=0; j < sub->num_values &&
i < (sub->values)->size; i++) {
if (NULL != ivals[i]) {
j++;
num_tokens = orte_value_array_get_size(&(ivals[i]->tokentags));
num_keys = orte_value_array_get_size(&(ivals[i]->keytags));
/* get the data for this description off the registry */
if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_fn(ivals[i]->addr_mode,
ivals[i]->seg,
ORTE_VALUE_ARRAY_GET_BASE(&(ivals[i]->tokentags), orte_gpr_replica_itag_t),
num_tokens,
ORTE_VALUE_ARRAY_GET_BASE(&(ivals[i]->keytags), orte_gpr_replica_itag_t),
num_keys,
&interim, &vals))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* if we don't get any data back, just continue - don't
* try to add it to the values since that would cause a
* zero-byte malloc
*/
if (0 == interim) {
continue;
}
/* add these results to those we have already obtained */
if (0 == cnt) { /* first time through */
values = (orte_gpr_value_t**)malloc(interim *
sizeof(orte_gpr_value_t*));
if (NULL == values) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
} else {
/* reallocate values array */
values = (orte_gpr_value_t**)realloc(values,
(cnt+interim)*sizeof(orte_gpr_value_t*));
if (NULL == values) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
}
/* add data to end of array */
for (k=0; k < interim; k++) {
values[k+cnt] = vals[k];
}
/* release the array of pointers - the pointers themselves
* will remain "alive" in the values array to be released
* later
*/
free(vals);
/* update the count */
cnt += interim;
}
if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_callback_data(&values, &cnt, sub))) {
ORTE_ERROR_LOG(rc);
return rc;
}
cleanup_reqd = true;
}
@ -300,72 +184,36 @@ int orte_gpr_replica_register_callback(orte_gpr_replica_trigger_t *trig,
* another datagram onto it to minimize communication costs.
*/
/* first, we need to determine if the data in this message
* is to be sent back through the trigger callback function
* or not. if it is, then we set the callback's message
* to point at the correct trigger id for that requestor
* so the message goes to the correct place, and we go ahead
* and store the data in the message
/* this data is intended to be sent to the individual
* subscribers themselves. Cycle through the subscription's
* requestors, define callbacks to them appropriately,
* and set the id to indicate that it does NOT go
* to a trigger
*/
if (NULL != trig && NULL != trig->master) {
/* define the callback */
if (ORTE_SUCCESS != (rc = orte_gpr_replica_define_callback(&cb, (trig->master)->requestor))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
/* transfer the trigger name, if available */
if (NULL != trig->name) {
(cb->message)->name = strdup(trig->name);
}
/* set the callback id to point to the trigger callback function */
(cb->message)->id = (trig->master)->idtag;
/* cycle through all the subscription's requestors and place
* the data on the message so that the trigger master can distribute
* it as required
*/
reqs = (orte_gpr_replica_requestor_t**)(sub->requestors)->addr;
for (i=0, j=0; j < sub->num_requestors &&
i < (sub->requestors)->size; i++) {
if (NULL != reqs[i]) {
j++;
if (ORTE_SUCCESS != (rc = orte_gpr_replica_store_value_in_msg(reqs[i]->idtag,
cb->message, cnt, values))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
reqs = (orte_gpr_replica_requestor_t**)(sub->requestors)->addr;
for (i=0, j=0; j < sub->num_requestors &&
i < (sub->requestors)->size; i++) {
if (NULL != reqs[i]) {
j++;
/* define the callback */
if (ORTE_SUCCESS != (rc = orte_gpr_replica_define_callback(ORTE_GPR_SUBSCRIPTION_MSG,
&cb, reqs[i]->requestor))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
/* set the callback id to indicate not a trigger callback */
(cb->message)->id = ORTE_GPR_TRIGGER_ID_MAX;
/* okay, now we have a message going to the requestor. We need to
* store the values in the notify_data structure corresponding to this
* subscription id, combining data where the id's match
*/
if (ORTE_SUCCESS != (rc = orte_gpr_replica_store_value_in_msg(reqs[i],
cb->message, cnt, values))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
}
} else {
/* this data is intended to be sent to the individual
* subscribers themselves. Cycle through the subscription's
* requestors, define callbacks to them appropriately,
* and set the id to indicate that it does NOT go
* to a trigger
*/
reqs = (orte_gpr_replica_requestor_t**)(sub->requestors)->addr;
for (i=0, j=0; j < sub->num_requestors &&
i < (sub->requestors)->size; i++) {
if (NULL != reqs[i]) {
j++;
/* define the callback */
if (ORTE_SUCCESS != (rc = orte_gpr_replica_define_callback(&cb, reqs[i]->requestor))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
/* set the callback id to indicate not a trigger callback */
(cb->message)->id = ORTE_GPR_TRIGGER_ID_MAX;
/* okay, now we have a message going to the requestor. We need to
* store the values in the notify_data structure corresponding to this
* subscription id, combining data where the id's match
*/
if (ORTE_SUCCESS != (rc = orte_gpr_replica_store_value_in_msg(reqs[i]->idtag,
cb->message, cnt, values))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
}
} /* for i */
} /* if else */
} /* for i */
CLEANUP:
/* release the values here - the value objects have been "retained" in
@ -383,7 +231,136 @@ CLEANUP:
}
int orte_gpr_replica_define_callback(orte_gpr_replica_callbacks_t **cbptr,
int orte_gpr_replica_register_trigger_callback(orte_gpr_replica_trigger_t *trig)
{
orte_gpr_replica_callbacks_t *cb;
orte_gpr_replica_counter_t **cntr;
orte_gpr_replica_subscription_t **subs;
orte_gpr_value_t **values, *value;
size_t i, j, k, cnt;
int rc;
/* set the callback's message
* to point at the correct trigger id for that requestor
* so the message goes to the correct place, and go ahead
* and store the data in the message
*/
/* define the callback */
if (ORTE_SUCCESS != (rc = orte_gpr_replica_define_callback(ORTE_GPR_TRIGGER_MSG,
&cb, (trig->master)->requestor))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* transfer the trigger name, if available */
if (NULL != trig->name) {
(cb->message)->target = strdup(trig->name);
}
/* set the callback id to point to the trigger callback function */
(cb->message)->id = (trig->master)->idtag;
/* if the trigger counters are to be included, do so */
if (ORTE_GPR_TRIG_INCLUDE_TRIG_CNTRS & trig->action) {
cntr = (orte_gpr_replica_counter_t**)((trig->counters)->addr);
for (i=0, j=0; j < trig->num_counters &&
i < (trig->counters)->size; i++) {
if (NULL != cntr[i]) {
j++;
value = OBJ_NEW(orte_gpr_value_t);
if (NULL == value) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value->segment = strdup(cntr[i]->seg->name);
value->cnt = 1;
value->keyvals = (orte_gpr_keyval_t**)malloc(sizeof(orte_gpr_keyval_t*));
if (NULL == value->keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value->keyvals[0] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == value->keyvals[0]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dict_reverse_lookup(
&(value->keyvals[0]->key), cntr[i]->seg,
cntr[i]->iptr->itag))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(value);
return rc;
}
value->keyvals[0]->type = cntr[i]->iptr->type;
if (ORTE_SUCCESS != (rc = orte_gpr_base_xfer_payload(
&(value->keyvals[0]->value),
&(cntr[i]->iptr->value), cntr[i]->iptr->type))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(value);
return rc;
}
/*
* store the data in the message
*/
if (ORTE_SUCCESS != (rc = orte_gpr_replica_store_value_in_trigger_msg(NULL,
cb->message, 1, &value))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* release the storage */
OBJ_RELEASE(value);
}
}
}
/* cycle through all the trigger's subscriptions and place
* that data on the message
*/
subs = (orte_gpr_replica_subscription_t**)(trig->subscriptions)->addr;
for (i=0, j=0; j < trig->num_subscriptions &&
i < (trig->subscriptions)->size; i++) {
if (NULL != subs[i]) {
j++;
if (NULL != subs[i]->name) {
/* if it's a named subscription, we will deliver it via the
* trigger callback function. The data to be returned will
* be the same for all requestors.
*/
if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_callback_data(&values, &cnt, subs[i]))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/*
* store the data in the message
*/
if (ORTE_SUCCESS != (rc = orte_gpr_replica_store_value_in_trigger_msg(subs[i],
cb->message, cnt, values))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* release the storage */
for (k=0; k < cnt; k++) OBJ_RELEASE(values[k]);
if (NULL != values) free(values);
} else {
/* in the case of a non-named subscription, we know that someone
* has attached a subscription to this trigger, and that the
* requestor needs the data to be returned directly to them. This
* occurs in the case of orterun, which attaches subscriptions to
* the standard triggers so it can monitor the progress of a job
* it has launched. To facilitate this, we register a separate
* callback for this subscription
*/
if (ORTE_SUCCESS != (rc = orte_gpr_replica_register_callback(subs[i], NULL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}
}
return ORTE_SUCCESS;
}
int orte_gpr_replica_define_callback(orte_gpr_notify_msg_type_t msg_type,
orte_gpr_replica_callbacks_t **cbptr,
orte_process_name_t *recipient)
{
orte_gpr_replica_callbacks_t *cb;
@ -393,12 +370,19 @@ int orte_gpr_replica_define_callback(orte_gpr_replica_callbacks_t **cbptr,
for (cb = (orte_gpr_replica_callbacks_t*)opal_list_get_first(&(orte_gpr_replica.callbacks));
cb != (orte_gpr_replica_callbacks_t*)opal_list_get_end(&(orte_gpr_replica.callbacks));
cb = (orte_gpr_replica_callbacks_t*)opal_list_get_next(cb)) {
if ((NULL == recipient && NULL == cb->requestor) ||
((NULL != recipient && NULL != cb->requestor) &&
/* must check to see if both the recipient is the same AND that the
* message type being sent is identical (i.e., that messages going back
* to trigger callbacks do NOT get mixed with messages going back to
* subscription callbacks). This is critical as the deliver_notify_msg
* functions handle these message types in different ways
*/
if (((NULL == recipient && NULL == cb->requestor) &&
(msg_type == cb->message->msg_type)) ||
(((NULL != recipient && NULL != cb->requestor) &&
(0 == orte_ns.compare(ORTE_NS_CMP_ALL,
recipient,
cb->requestor)))) {
cb->requestor))) &&
(msg_type == cb->message->msg_type))) {
/* okay, a callback has been registered to send data to this
* recipient - return this location
*/
@ -424,6 +408,7 @@ int orte_gpr_replica_define_callback(orte_gpr_replica_callbacks_t **cbptr,
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
cb->message->msg_type = msg_type;
if (NULL == recipient) {
cb->requestor = NULL;
@ -441,7 +426,7 @@ int orte_gpr_replica_define_callback(orte_gpr_replica_callbacks_t **cbptr,
}
int orte_gpr_replica_store_value_in_msg(orte_gpr_subscription_id_t id,
int orte_gpr_replica_store_value_in_msg(orte_gpr_replica_requestor_t *req,
orte_gpr_notify_message_t *msg,
size_t cnt,
orte_gpr_value_t **values)
@ -459,7 +444,7 @@ int orte_gpr_replica_store_value_in_msg(orte_gpr_subscription_id_t id,
i < (msg->data)->size; i++) {
if (NULL != data[i]) {
k++;
if (data[i]->id == id) { /* going to the same place */
if (data[i]->id == req->idtag) { /* going to the same place */
for (j=0; j < cnt; j++) {
if (0 > orte_pointer_array_add(&index, data[i]->values, values[j])) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
@ -489,7 +474,7 @@ int orte_gpr_replica_store_value_in_msg(orte_gpr_subscription_id_t id,
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
dptr->id = id;
dptr->id = req->idtag;
if (0 > orte_pointer_array_add(&index, msg->data, dptr)) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
@ -514,3 +499,159 @@ int orte_gpr_replica_store_value_in_msg(orte_gpr_subscription_id_t id,
return ORTE_SUCCESS;
}
static int orte_gpr_replica_store_value_in_trigger_msg(orte_gpr_replica_subscription_t *sub,
orte_gpr_notify_message_t *msg,
size_t cnt,
orte_gpr_value_t **values)
{
size_t i, j, k, index;
orte_gpr_notify_data_t **data, *dptr;
/* check to see if this data is going to the same place as
* any prior data on the message. if so, then we add the values
* to that existing data structure. if not, then we realloc to
* establish a new data structure and store the data there
*/
data = (orte_gpr_notify_data_t**)(msg->data)->addr;
for (i=0, k=0; k < msg->cnt &&
i < (msg->data)->size; i++) {
if (NULL != data[i]) {
k++;
if ((NULL == data[i]->target && NULL == sub) ||
(NULL != data[i]->target &&
0 == strcmp(data[i]->target, sub->name))) { /* going to the same place */
for (j=0; j < cnt; j++) {
if (0 > orte_pointer_array_add(&index, data[i]->values, values[j])) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* must "retain" the value object to ensure that it is
* there for this datagram. Since we are only storing
* pointers to the object (and not actually copying it),
* datagrams may wind up sharing the object. Hence, when
* a datagram is released, it will release the object. Without
* the retain, the next datagram that shares that object
* will see trash
*/
OBJ_RETAIN(values[j]);
}
data[i]->cnt += cnt;
return ORTE_SUCCESS;
}
}
}
/* no prior matching data found, so add another data location to
* the message and store the values there
*/
dptr = OBJ_NEW(orte_gpr_notify_data_t);
if (NULL == dptr) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (NULL != sub) {
dptr->target = strdup(sub->name);
}
if (0 > orte_pointer_array_add(&index, msg->data, dptr)) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
(msg->cnt)++;
for (j=0; j < cnt; j++) {
if (0 > orte_pointer_array_add(&index, dptr->values, values[j])) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* must "retain" the value object to ensure that it is
* there for this datagram. Since we are only storing
* pointers to the object (and not actually copying it),
* datagrams may wind up sharing the object. Hence, when
* a datagram is released, it will release the object. Without
* the retain, the next datagram that shares that object
* will see trash
*/
OBJ_RETAIN(values[j]);
}
dptr->cnt = cnt;
return ORTE_SUCCESS;
}
static int orte_gpr_replica_get_callback_data(orte_gpr_value_t ***ret_values, size_t *cnt,
orte_gpr_replica_subscription_t *sub)
{
orte_gpr_value_t **vals, **values;
orte_gpr_replica_ivalue_t **ivals;
size_t i, j, k, num_tokens, num_keys, interim, count;
int rc;
/* setup default error returns */
*ret_values = NULL;
*cnt = 0;
/* get the data off the registry. since a
* subscription can have multiple data sources specified, we
* have to loop through those sources, constructing an aggregated
* array of data values that we can work with in composing the
* final message
*/
ivals = (orte_gpr_replica_ivalue_t**)(sub->values)->addr;
count = 0;
values = NULL;
for (i=0, j=0; j < sub->num_values &&
i < (sub->values)->size; i++) {
if (NULL != ivals[i]) {
j++;
num_tokens = orte_value_array_get_size(&(ivals[i]->tokentags));
num_keys = orte_value_array_get_size(&(ivals[i]->keytags));
/* get the data for this description off the registry */
if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_fn(ivals[i]->addr_mode,
ivals[i]->seg,
ORTE_VALUE_ARRAY_GET_BASE(&(ivals[i]->tokentags), orte_gpr_replica_itag_t),
num_tokens,
ORTE_VALUE_ARRAY_GET_BASE(&(ivals[i]->keytags), orte_gpr_replica_itag_t),
num_keys,
&interim, &vals))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* if we don't get any data back, just continue - don't
* try to add it to the values since that would cause a
* zero-byte malloc
*/
if (0 == interim) {
continue;
}
/* add these results to those we have already obtained */
if (0 == count) { /* first time through */
values = (orte_gpr_value_t**)malloc(interim *
sizeof(orte_gpr_value_t*));
if (NULL == values) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
} else {
/* reallocate values array */
values = (orte_gpr_value_t**)realloc(values,
(count+interim)*sizeof(orte_gpr_value_t*));
if (NULL == values) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
}
/* add data to end of array */
for (k=0; k < interim; k++) {
values[k+count] = vals[k];
}
/* release the array of pointers - the pointers themselves
* will remain "alive" in the values array to be released
* later
*/
free(vals);
/* update the count */
count += interim;
}
}
*ret_values = values;
*cnt = count;
return ORTE_SUCCESS;
}

Просмотреть файл

@ -86,11 +86,19 @@ orte_gpr_replica_register_subscription(orte_gpr_replica_subscription_t **subptr,
* Un-named subscriptions are, therefore, assumed to be specialty
* subscriptions that do not merit such consideration.
*/
/* see if another subscription is available on the system */
if (ORTE_GPR_SUBSCRIPTION_ID_MAX-1 < orte_gpr_replica.num_subs) { /* none left! */
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub = OBJ_NEW(orte_gpr_replica_subscription_t);
if (NULL == sub) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub->idtag = orte_gpr_replica.num_subs;
if (NULL != subscription->name) {
sub->name = strdup(subscription->name);
@ -107,6 +115,7 @@ orte_gpr_replica_register_subscription(orte_gpr_replica_subscription_t **subptr,
ival = OBJ_NEW(orte_gpr_replica_ivalue_t);
if (NULL == ival) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_RELEASE(sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
@ -114,6 +123,8 @@ orte_gpr_replica_register_subscription(orte_gpr_replica_subscription_t **subptr,
if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_seg(&(ival->seg), true,
subscription->values[i]->segment))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(sub);
OBJ_RELEASE(ival);
return rc;
}
tok_mode = 0x004f & subscription->values[i]->addr_mode;
@ -132,11 +143,15 @@ orte_gpr_replica_register_subscription(orte_gpr_replica_subscription_t **subptr,
if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_itag_list(&tokentags, ival->seg,
subscription->values[i]->tokens, &num_tokens))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(sub);
OBJ_RELEASE(ival);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_value_array_set_size(&(ival->tokentags), (size_t)num_tokens))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(sub);
OBJ_RELEASE(ival);
return rc;
}
for (j=0; j < num_tokens; j++) {
@ -152,6 +167,8 @@ orte_gpr_replica_register_subscription(orte_gpr_replica_subscription_t **subptr,
num_keys = subscription->values[i]->cnt;
if (ORTE_SUCCESS != (rc = orte_value_array_set_size(&(ival->keytags), num_keys))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(sub);
OBJ_RELEASE(ival);
return rc;
}
for (j=0; j < num_keys; j++) {
@ -159,6 +176,8 @@ orte_gpr_replica_register_subscription(orte_gpr_replica_subscription_t **subptr,
ival->seg,
subscription->values[i]->keyvals[j]->key))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(sub);
OBJ_RELEASE(ival);
return rc;
}
ORTE_VALUE_ARRAY_SET_ITEM(&(ival->keytags), orte_gpr_replica_itag_t,
@ -168,6 +187,8 @@ orte_gpr_replica_register_subscription(orte_gpr_replica_subscription_t **subptr,
/* add the object to the subscription's value pointer array */
if (0 > (rc = orte_pointer_array_add(&(ival->index), sub->values, ival))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_RELEASE(sub);
OBJ_RELEASE(ival);
return ORTE_ERR_OUT_OF_RESOURCE;
}
(sub->num_values)++;
@ -175,6 +196,7 @@ orte_gpr_replica_register_subscription(orte_gpr_replica_subscription_t **subptr,
/* add the object to the replica's subscriptions pointer array */
if (0 > (rc = orte_pointer_array_add(&(sub->index), orte_gpr_replica.subscriptions, sub))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_RELEASE(sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
(orte_gpr_replica.num_subs)++;
@ -273,11 +295,18 @@ orte_gpr_replica_register_trigger(orte_gpr_replica_trigger_t **trigptr,
* triggers that do not merit such consideration.
*/
/* see if another trigger is available */
if (ORTE_GPR_TRIGGER_ID_MAX-1 < orte_gpr_replica.num_trigs) { /* none left! */
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
trig = OBJ_NEW(orte_gpr_replica_trigger_t);
if (NULL == trig) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
trig->idtag = orte_gpr_replica.num_trigs;
/* if a name for this trigger has been provided, copy it over */
if (NULL != trigger->name) {
@ -331,7 +360,8 @@ orte_gpr_replica_register_trigger(orte_gpr_replica_trigger_t **trigptr,
goto CLEANUP;
}
if (0 == orte_gpr_replica_globals.num_srch_cptr) { /* no existing container found - create one using all the tokens */
if (0 == orte_gpr_replica_globals.num_srch_cptr) {
/* no existing container found - create one using all the tokens */
if (ORTE_SUCCESS != (rc = orte_gpr_replica_create_container(&cptr2, seg,
num_tokens, tokentags))) {
ORTE_ERROR_LOG(rc);
@ -392,7 +422,8 @@ orte_gpr_replica_register_trigger(orte_gpr_replica_trigger_t **trigptr,
0 < orte_gpr_replica_globals.num_srch_ival) {
/* this key already exists - make sure it's unique
*/
if (1 < orte_gpr_replica_globals.num_srch_ival || found) { /* not unique - error out */
if (1 < orte_gpr_replica_globals.num_srch_ival || found) {
/* not unique - error out */
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
rc = ORTE_ERR_BAD_PARAM;
goto CLEANUP;
@ -469,12 +500,23 @@ ADDREQ:
* the data
*/
if (ORTE_GPR_TRIG_ROUTE_DATA_THRU_ME & trig->action) {
if (NULL != trig->master) {
if (NULL == trig->master) {
/* someone already requested this responsibility.
* this is an error - report it
* if I'm a singleton, this is NOT an error - the
* initial "launch" has recorded the stage gate
* triggers using the [-1,-1,-1] name, so we need to
* overwrite that with my name so I get the notifications.
*/
ORTE_ERROR_LOG(ORTE_ERR_NOT_AVAILABLE);
#if 0
if (orte_process_info.singleton || orte_process_info.seed) {
opal_output(0, "Trigger master being redefined");
trig->master = req;
} else {
/* if i'm not a singleton, then this is an error - report it */
ORTE_ERROR_LOG(ORTE_ERR_NOT_AVAILABLE);
}
} else {
#endif
trig->master = req;
}
}
@ -911,34 +953,67 @@ int orte_gpr_replica_check_trig(orte_gpr_replica_trigger_t *trig)
return ORTE_SUCCESS; /* neither cmp nor at level set */
FIRED:
/* for each subscription associated with this trigger, we need to
* register a callback to the requestor that returns the specified
* data
/* if this trigger wants everything routed through a "master", then we register
* this as a trigger_callback.
*/
subs = (orte_gpr_replica_subscription_t**)(trig->subscriptions)->addr;
for (i=0, j=0; j < trig->num_subscriptions &&
i < (trig->subscriptions)->size; i++) {
if (NULL != subs[i]) {
j++;
if (ORTE_SUCCESS != (rc = orte_gpr_replica_register_callback(trig, subs[i], NULL))) {
ORTE_ERROR_LOG(rc);
return rc;
if (NULL != trig->master) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_register_trigger_callback(trig))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* for each subscription assocated with this trigger, check to see if
* the subscription needs any special treatment
*/
subs = (orte_gpr_replica_subscription_t**)(trig->subscriptions)->addr;
for (i=0, j=0; j < trig->num_subscriptions &&
i < (trig->subscriptions)->size; i++) {
if (NULL != subs[i]) {
j++;
/* if ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG set, set the subscription
* "active" to indicate that trigger fired
*/
if (ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG & subs[i]->action) {
subs[i]->active = true;
}
/* if ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG set, then set the flag
* so it can be cleaned up later
*/
if (ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG & subs[i]->action) {
subs[i]->cleanup = true;
}
}
/* if ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG set, set the subscription
* "active" to indicate that trigger fired
*/
if (ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG & subs[i]->action) {
subs[i]->active = true;
}
/* if ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG set, then set the flag
* so it can be cleaned up later
*/
if (ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG & subs[i]->action) {
subs[i]->cleanup = true;
}
} else {
/* for each subscription associated with this trigger, we need to
* register a callback to the requestor that returns the specified
* data
*/
subs = (orte_gpr_replica_subscription_t**)(trig->subscriptions)->addr;
for (i=0, j=0; j < trig->num_subscriptions &&
i < (trig->subscriptions)->size; i++) {
if (NULL != subs[i]) {
j++;
if (ORTE_SUCCESS != (rc = orte_gpr_replica_register_callback(subs[i], NULL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* if ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG set, set the subscription
* "active" to indicate that trigger fired
*/
if (ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG & subs[i]->action) {
subs[i]->active = true;
}
/* if ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG set, then set the flag
* so it can be cleaned up later
*/
if (ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG & subs[i]->action) {
subs[i]->cleanup = true;
}
}
}
}
/* set the processing flag so we don't go into infinite loop if
* any callback functions modify the registry
*/
@ -1057,7 +1132,7 @@ int orte_gpr_replica_check_subscription(orte_gpr_replica_subscription_t *sub)
goto CLEANUP;
}
if (ORTE_SUCCESS != (rc =
orte_gpr_replica_register_callback(NULL, sub, value))) {
orte_gpr_replica_register_callback(sub, value))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}

Просмотреть файл

@ -77,6 +77,7 @@ typedef uint8_t orte_gpr_replica_action_t;
typedef struct {
opal_object_t super; /**< Allows this to be an object */
orte_gpr_subscription_id_t id; /**< id of this subscription */
size_t index; /**< location of this subscription in array */
char *name;
orte_gpr_notify_cb_fn_t callback; /**< Function to be called for notificaiton */
void *user_tag; /**< User-provided tag for callback function */
@ -92,6 +93,7 @@ OBJ_CLASS_DECLARATION(orte_gpr_replica_local_subscriber_t);
typedef struct {
opal_object_t super; /**< Allows this to be an object */
orte_gpr_trigger_id_t id; /**< id of this trigger */
size_t index; /**< location of this trigger in array */
char *name;
orte_gpr_trigger_cb_fn_t callback; /**< Function to be called for notification */
void *user_tag; /**< User-provided tag for callback function */
@ -148,9 +150,9 @@ struct orte_gpr_replica_t {
orte_pointer_array_t *segments; /**< Managed array of pointers to segment objects */
size_t num_segs;
orte_pointer_array_t *triggers; /**< Managed array of pointers to triggers */
size_t num_trigs;
orte_gpr_trigger_id_t num_trigs;
orte_pointer_array_t *subscriptions; /**< Managed array of pointers to subscriptions */
size_t num_subs;
orte_gpr_subscription_id_t num_subs;
bool processing_callbacks;
opal_list_t callbacks; /**< List of callbacks to be processed */
};
@ -260,17 +262,18 @@ typedef struct {
orte_process_name_t *requestor;
/* idtag associated with this subscription */
orte_gpr_subscription_id_t idtag;
/* for a local subscription, where this block of data goes */
orte_gpr_notify_cb_fn_t callback; /**< Function to be called for notification */
void *user_tag; /**< User-provided tag for callback function */
} orte_gpr_replica_requestor_t;
OBJ_CLASS_DECLARATION(orte_gpr_replica_requestor_t);
typedef struct {
opal_object_t super; /**< Makes this an object */
/* index of this entry in subscription array - corresponds to local idtag */
opal_object_t super; /**< Makes this an object */
/* index of this entry in subscription array */
size_t index;
/* idtag for the subscription - may be different than index since
* the data type can be different than size_t
*/
orte_gpr_subscription_id_t idtag;
/* name of this subscription, if provided */
char *name;
/* boolean indicating if this subscription is active or not */
@ -323,8 +326,10 @@ struct orte_gpr_replica_trigger_t {
opal_object_t super; /**< Make this an object */
/* name of this trigger, if provided */
char *name;
/* index of this trigger in the triggers array - corresponds to local idtag */
/* index of this trigger in the triggers array */
size_t index;
/* trigger id on the local system */
orte_gpr_trigger_id_t idtag;
/* array of requestors that have "attached" themselves to this trigger */
size_t num_attached;
orte_pointer_array_t *attached;

Просмотреть файл

@ -37,6 +37,8 @@
static void orte_gpr_replica_local_subscriber_constructor(orte_gpr_replica_local_subscriber_t *ptr)
{
ptr->name = NULL;
ptr->callback = NULL;
ptr->user_tag = NULL;
}
static void orte_gpr_replica_local_subscriber_destructor(orte_gpr_replica_local_subscriber_t *ptr)
@ -55,6 +57,8 @@ OBJ_CLASS_INSTANCE(
static void orte_gpr_replica_local_trigger_constructor(orte_gpr_replica_local_trigger_t *ptr)
{
ptr->name = NULL;
ptr->callback = NULL;
ptr->user_tag = NULL;
}
static void orte_gpr_replica_local_trigger_destructor(orte_gpr_replica_local_trigger_t *ptr)
@ -303,6 +307,7 @@ OBJ_CLASS_INSTANCE(
static void orte_gpr_replica_subscription_construct(orte_gpr_replica_subscription_t* sub)
{
sub->index = 0;
sub->idtag = ORTE_GPR_SUBSCRIPTION_ID_MAX;
sub->name = NULL;
sub->active = false;
sub->processing = false;
@ -367,6 +372,7 @@ OBJ_CLASS_INSTANCE(
static void orte_gpr_replica_trigger_requestor_construct(orte_gpr_replica_trigger_requestor_t* ptr)
{
ptr->index = 0;
ptr->idtag = ORTE_GPR_TRIGGER_ID_MAX;
ptr->requestor = NULL;
ptr->idtag = 0;
}
@ -391,6 +397,7 @@ static void orte_gpr_replica_trigger_construct(orte_gpr_replica_trigger_t* trig)
{
trig->name = NULL;
trig->index = 0;
trig->idtag = ORTE_GPR_TRIGGER_ID_MAX;
trig->num_attached = 0;
orte_pointer_array_init(&(trig->attached), orte_gpr_array_block_size,

Просмотреть файл

@ -43,17 +43,17 @@
*/
OMPI_COMP_EXPORT mca_gpr_base_component_t mca_gpr_replica_component = {
{
MCA_GPR_BASE_VERSION_1_0_0,
MCA_GPR_BASE_VERSION_1_0_0,
"replica", /* MCA module name */
ORTE_MAJOR_VERSION, /* MCA module major version */
ORTE_MINOR_VERSION, /* MCA module minor version */
ORTE_RELEASE_VERSION, /* MCA module release version */
orte_gpr_replica_open, /* module open */
orte_gpr_replica_close /* module close */
"replica", /* MCA module name */
ORTE_MAJOR_VERSION, /* MCA module major version */
ORTE_MINOR_VERSION, /* MCA module minor version */
ORTE_RELEASE_VERSION, /* MCA module release version */
orte_gpr_replica_open, /* module open */
orte_gpr_replica_close /* module close */
},
{
false /* checkpoint / restart */
false /* checkpoint / restart */
},
orte_gpr_replica_init, /* module init */
orte_gpr_replica_finalize /* module shutdown */
@ -82,6 +82,7 @@ static orte_gpr_base_module_t orte_gpr_replica_module = {
/* GENERAL OPERATIONS */
orte_gpr_replica_preallocate_segment,
orte_gpr_base_xfer_payload,
orte_gpr_replica_deliver_notify_msg,
/* ARITHMETIC OPERATIONS */
orte_gpr_replica_increment_value,
orte_gpr_replica_decrement_value,
@ -90,6 +91,7 @@ static orte_gpr_base_module_t orte_gpr_replica_module = {
orte_gpr_base_subscribe_1,
orte_gpr_base_subscribe_N,
orte_gpr_base_define_trigger,
orte_gpr_base_define_trigger_level,
orte_gpr_replica_unsubscribe,
orte_gpr_replica_cancel_trigger,
/* COMPOUND COMMANDS */
@ -101,6 +103,8 @@ static orte_gpr_base_module_t orte_gpr_replica_module = {
orte_gpr_replica_dump_segments,
orte_gpr_replica_dump_triggers,
orte_gpr_replica_dump_subscriptions,
orte_gpr_replica_dump_a_trigger,
orte_gpr_replica_dump_a_subscription,
orte_gpr_replica_dump_local_triggers,
orte_gpr_replica_dump_local_subscriptions,
orte_gpr_replica_dump_callbacks,
@ -168,22 +172,22 @@ orte_gpr_base_module_t *orte_gpr_replica_init(bool *allow_multi_user_threads, bo
if (NULL == orte_process_info.gpr_replica_uri) {
/* Return a module (choose an arbitrary, positive priority --
it's only relevant compared to other ns components). If
we're not the seed, then we don't want to be selected, so
return NULL. */
/* Return a module (choose an arbitrary, positive priority --
it's only relevant compared to other ns components). If
we're not the seed, then we don't want to be selected, so
return NULL. */
*priority = 50;
*priority = 50;
/* We allow multi user threads but don't have any hidden threads */
/* We allow multi user threads but don't have any hidden threads */
*allow_multi_user_threads = true;
*have_hidden_threads = false;
*allow_multi_user_threads = true;
*have_hidden_threads = false;
/* setup the thread locks and condition variables */
OBJ_CONSTRUCT(&orte_gpr_replica_globals.mutex, opal_mutex_t);
/* setup the thread locks and condition variables */
OBJ_CONSTRUCT(&orte_gpr_replica_globals.mutex, opal_mutex_t);
/* initialize the registry head */
/* initialize the registry head */
if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_gpr_replica.segments),
orte_gpr_array_block_size,
orte_gpr_array_max_size,
@ -211,8 +215,8 @@ orte_gpr_base_module_t *orte_gpr_replica_init(bool *allow_multi_user_threads, bo
}
orte_gpr_replica.num_subs = 0;
/* initialize the callback list head */
OBJ_CONSTRUCT(&orte_gpr_replica.callbacks, opal_list_t);
/* initialize the callback list head */
OBJ_CONSTRUCT(&orte_gpr_replica.callbacks, opal_list_t);
orte_gpr_replica.processing_callbacks = false;
/* initialize the local subscription and trigger trackers */
@ -280,12 +284,12 @@ orte_gpr_base_module_t *orte_gpr_replica_init(bool *allow_multi_user_threads, bo
opal_output(0, "nb receive setup");
}
/* Return the module */
/* Return the module */
initialized = true;
return &orte_gpr_replica_module;
} else {
return NULL;
return NULL;
}
}
@ -315,7 +319,7 @@ int orte_gpr_replica_finalize(void)
orte_gpr_replica_callbacks_t* cb;
if (orte_gpr_replica_globals.debug) {
opal_output(0, "finalizing gpr replica");
opal_output(0, "finalizing gpr replica");
}
seg = (orte_gpr_replica_segment_t**)(orte_gpr_replica.segments)->addr;
@ -367,6 +371,6 @@ int orte_gpr_replica_finalize(void)
return ORTE_SUCCESS;
}
orte_rml.recv_cancel(ORTE_RML_NAME_ANY, ORTE_RML_TAG_GPR);
orte_rml.recv_cancel(ORTE_RML_NAME_ANY, ORTE_RML_TAG_GPR);
return ORTE_SUCCESS;
}

Просмотреть файл

@ -77,8 +77,8 @@ int orte_ns_replica_create_cellid(orte_cellid_t *cellid, char *site, char *resou
*cellid = new_cell->cell;
(orte_ns_replica.num_cells)++;
OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex);
return ORTE_SUCCESS;
OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex);
return ORTE_SUCCESS;
}
int orte_ns_replica_get_cell_info(orte_cellid_t cellid,
@ -235,6 +235,7 @@ PROCESS:
nptr->cellid = 0;
nptr->jobid = job;
nptr->vpid = (orte_vpid_t)k;
nptr++;
}
*num_procs = (size_t)ptr[j]->next_vpid;

Просмотреть файл

@ -26,6 +26,7 @@
#include "dps/dps_types.h"
#include "mca/mca.h"
#include "mca/ns/ns_types.h"
#include "mca/gpr/gpr_types.h"
#include "mca/oob/oob_types.h"
#ifdef HAVE_SYS_UIO_H
@ -406,7 +407,7 @@ OMPI_DECLSPEC int mca_oob_xcast(
orte_process_name_t* peers,
size_t num_peers,
orte_buffer_t* buffer,
mca_oob_callback_packed_fn_t cbfunc);
orte_gpr_trigger_cb_fn_t cbfunc);
#if defined(c_plusplus) || defined(__cplusplus)
}

Просмотреть файл

@ -20,6 +20,7 @@
#include "include/constants.h"
#include "opal/util/output.h"
#include "util/proc_info.h"
#include "orte/dps/dps.h"
#include "mca/oob/oob.h"
#include "mca/oob/base/base.h"
#include "mca/ns/ns.h"
@ -46,7 +47,7 @@ int mca_oob_xcast(
orte_process_name_t* peers,
size_t num_peers,
orte_buffer_t* buffer,
mca_oob_callback_packed_fn_t cbfunc)
orte_gpr_trigger_cb_fn_t cbfunc)
{
size_t i;
int rc;
@ -74,14 +75,29 @@ int mca_oob_xcast(
}
} else {
orte_buffer_t rbuf;
orte_gpr_notify_message_t *msg;
OBJ_CONSTRUCT(&rbuf, orte_buffer_t);
rc = mca_oob_recv_packed(MCA_OOB_NAME_ANY, &rbuf, tag);
if(rc < 0) {
OBJ_DESTRUCT(&rbuf);
return rc;
}
if(cbfunc != NULL)
cbfunc(rc, root, &rbuf, tag, NULL);
if (cbfunc != NULL) {
msg = OBJ_NEW(orte_gpr_notify_message_t);
if (NULL == msg) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
i=1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(&rbuf, &msg, &i, ORTE_GPR_NOTIFY_MSG))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(msg);
return rc;
}
cbfunc(msg);
OBJ_RELEASE(msg);
}
OBJ_DESTRUCT(&rbuf);
}
return ORTE_SUCCESS;

Просмотреть файл

@ -194,7 +194,7 @@ typedef int (*mca_oob_base_module_xcast_fn_t)(orte_process_name_t* root,
orte_process_name_t* peers,
size_t num_peers,
orte_buffer_t* buffer,
mca_oob_callback_packed_fn_t cbfunc);
orte_gpr_trigger_cb_fn_t cbfunc);
/**
* OOB Module

Просмотреть файл

@ -616,8 +616,9 @@ int mca_oob_tcp_resolve(mca_oob_tcp_peer_t* peer)
{
mca_oob_tcp_addr_t* addr;
mca_oob_tcp_subscription_t* subscription;
orte_gpr_trigger_t trig, *trigs;
orte_gpr_subscription_t sub, *subs;
char *segment, *sub_name, *trig_name;
char *key="oob-tcp";
orte_gpr_subscription_id_t sub_id;
opal_list_item_t* item;
int rc;
@ -642,111 +643,55 @@ int mca_oob_tcp_resolve(mca_oob_tcp_peer_t* peer)
}
}
OBJ_CONSTRUCT(&sub, orte_gpr_subscription_t);
/* indicate that this is a standard subscription. This indicates that the
* subscription will be common to all processes. Thus, the resulting data
* can be consolidated into a process-independent message and broadcast
* to all processes
*/
if (ORTE_SUCCESS != (rc = orte_schema.get_std_subscription_name(&(sub.name),
if (ORTE_SUCCESS != (rc = orte_schema.get_std_subscription_name(&sub_name,
OMPI_OOB_SUBSCRIPTION, peer->peer_name.jobid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* send data when trigger fires, continue to monitor. The default
* action for any subscription that includes a trigger condition is
* to send the specified data when the trigger fires. This set of flags
* indicates that - AFTER the trigger fires - the subscription should
* continue to send data any time an entry is added or changed.
*/
sub.action = ORTE_GPR_NOTIFY_ADD_ENTRY |
ORTE_GPR_NOTIFY_VALUE_CHG |
ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG;
/* setup the value structures that describe the data to
* be monitored and returned by this subscription
*/
sub.cnt = 1;
sub.values = (orte_gpr_value_t**)malloc(sizeof(orte_gpr_value_t*));
if (NULL == sub.values) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub.values[0] = OBJ_NEW(orte_gpr_value_t);
if (NULL == sub.values[0]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub.cnt = 1;
/* define the segment */
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(
&(sub.values[0]->segment),
peer->peer_name.jobid))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&sub);
return rc;
}
sub.values[0]->addr_mode = ORTE_GPR_KEYS_OR | ORTE_GPR_TOKENS_OR;
/* look at all containers on this segment */
sub.values[0]->tokens = NULL;
sub.values[0]->num_tokens = 0;
/* look for any keyval with "modex" key */
sub.values[0]->cnt = 1;
sub.values[0]->keyvals = (orte_gpr_keyval_t**)malloc(sizeof(orte_gpr_keyval_t*));
if (NULL == sub.values[0]->keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub.values[0]->keyvals[0] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == sub.values[0]->keyvals[0]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub.values[0]->keyvals[0]->key = strdup("oob-tcp");
if (NULL == sub.values[0]->keyvals[0]->key) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* define the callback function */
sub.cbfunc = mca_oob_tcp_registry_callback;
sub.user_tag = NULL;
/* setup the trigger value */
OBJ_CONSTRUCT(&trig, orte_gpr_trigger_t);
if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&(trig.name),
/* attach to the stage-1 standard trigger */
if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&trig_name,
ORTE_STG1_TRIGGER, peer->peer_name.jobid))) {
ORTE_ERROR_LOG(rc);
free(sub_name);
return rc;
}
/* this is an ORTE-standard trigger that is defined by the ORTE resource manager
* when the job was launched - therefore, we don't need to provide any additional
* info
*/
/* define the segment */
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment,
peer->peer_name.jobid))) {
ORTE_ERROR_LOG(rc);
free(sub_name);
free(trig_name);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_gpr.subscribe_1(&sub_id, trig_name, sub_name,
ORTE_GPR_NOTIFY_ADD_ENTRY |
ORTE_GPR_NOTIFY_VALUE_CHG |
ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG,
ORTE_GPR_KEYS_OR | ORTE_GPR_TOKENS_OR,
segment,
NULL, /* look at all containers on this segment */
key,
mca_oob_tcp_registry_callback, NULL))) {
ORTE_ERROR_LOG(rc);
free(sub_name);
free(trig_name);
free(segment);
return rc;
}
trigs = &trig;
subs = &sub;
subscription = OBJ_NEW(mca_oob_tcp_subscription_t);
subscription->jobid = peer->peer_name.jobid;
rc = orte_gpr.subscribe(1, &subs, 1, &trigs);
if(rc != OMPI_SUCCESS) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&sub);
OBJ_DESTRUCT(&trig);
return rc;
}
/* the id of each subscription is stored by the system in the corresponding
* subscription object we passed into orte_gpr.subscribe. We record it
/* the id of each subscription is recorded
* here so we can (if desired) cancel that subscription later
*/
subscription->subid = sub.id;
subscription->subid = sub_id;
/* done with these, so release any memory */
OBJ_DESTRUCT(&sub);
OBJ_DESTRUCT(&trig);
free(trig_name);
free(sub_name);
free(segment);
opal_list_append(&mca_oob_tcp_component.tcp_subscriptions, &subscription->item);
OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock);
@ -761,13 +706,16 @@ int mca_oob_tcp_init(void)
{
orte_jobid_t jobid;
orte_buffer_t *buffer;
orte_gpr_trigger_t trig, *trigs;
orte_gpr_value_t *value;
orte_gpr_subscription_id_t sub_id;
char *sub_name, *segment, *trig_name, **tokens;
char *keys[] = {"oob-tcp", ORTE_PROC_RML_IP_ADDRESS_KEY};
orte_data_type_t types[2];
orte_gpr_value_union_t values[2];
mca_oob_tcp_subscription_t *subscription;
orte_gpr_subscription_t sub, *subs;
int rc;
opal_list_item_t* item;
char *tmp, *tmp2, *tmp3;
size_t num_tokens;
/* random delay to stagger connections back to seed */
#if defined(WIN32)
@ -807,173 +755,76 @@ int mca_oob_tcp_init(void)
ORTE_NAME_ARGS(orte_process_info.my_name));
}
/* setup the subscription description value */
OBJ_CONSTRUCT(&sub, orte_gpr_subscription_t);
/* indicate that this is a standard subscription. This indicates that the
* subscription will be common to all processes. Thus, the resulting data
* can be consolidated into a process-independent message and broadcast
* to all processes
*/
if (ORTE_SUCCESS != (rc = orte_schema.get_std_subscription_name(&(sub.name),
if (ORTE_SUCCESS != (rc = orte_schema.get_std_subscription_name(&sub_name,
OMPI_OOB_SUBSCRIPTION, jobid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* send data when trigger fires, continue to monitor. The default
* action for any subscription that includes a trigger condition is
* to send the specified data when the trigger fires. This set of flags
* indicates that - AFTER the trigger fires - the subscription should
* continue to send data any time an entry is added or changed.
*/
sub.action = ORTE_GPR_NOTIFY_ADD_ENTRY |
ORTE_GPR_NOTIFY_VALUE_CHG |
ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG;
/* setup the value structures that describe the data to
* be monitored and returned by this subscription
*/
sub.cnt = 1;
sub.values = (orte_gpr_value_t**)malloc(sizeof(orte_gpr_value_t*));
if (NULL == sub.values) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub.values[0] = OBJ_NEW(orte_gpr_value_t);
if (NULL == sub.values[0]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
/* attach to the stage-1 standard trigger */
if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&trig_name,
ORTE_STG1_TRIGGER, jobid))) {
ORTE_ERROR_LOG(rc);
free(sub_name);
return rc;
}
/* define the segment */
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(
&(sub.values[0]->segment),
jobid))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&sub);
return rc;
}
sub.values[0]->addr_mode = ORTE_GPR_KEYS_OR | ORTE_GPR_TOKENS_OR;
/* look at all containers on this segment */
sub.values[0]->tokens = NULL;
sub.values[0]->num_tokens = 0;
/* look for any keyval with "modex" key */
sub.values[0]->cnt = 1;
sub.values[0]->keyvals = (orte_gpr_keyval_t**)malloc(sizeof(orte_gpr_keyval_t*));
if (NULL == sub.values[0]->keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub.values[0]->keyvals[0] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == sub.values[0]->keyvals[0]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub.values[0]->keyvals[0]->key = strdup("oob-tcp");
if (NULL == sub.values[0]->keyvals[0]->key) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* define the callback function */
sub.cbfunc = mca_oob_tcp_registry_callback;
sub.user_tag = NULL;
/* setup the trigger value */
OBJ_CONSTRUCT(&trig, orte_gpr_trigger_t);
if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&(trig.name),
ORTE_STG1_TRIGGER, jobid))) {
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) {
ORTE_ERROR_LOG(rc);
free(sub_name);
free(trig_name);
return rc;
}
/* this is an ORTE-standard trigger that is defined by the ORTE resource manager
* when the job was launched - therefore, we don't need to provide any additional
* info
*/
trigs = &trig;
subs = &sub;
subscription = OBJ_NEW(mca_oob_tcp_subscription_t);
subscription->jobid = jobid;
rc = orte_gpr.subscribe(1, &subs, 1, &trigs);
if(rc != OMPI_SUCCESS) {
if (ORTE_SUCCESS != (rc = orte_gpr.subscribe_1(&sub_id, trig_name, sub_name,
ORTE_GPR_NOTIFY_ADD_ENTRY |
ORTE_GPR_NOTIFY_VALUE_CHG |
ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG,
ORTE_GPR_KEYS_OR | ORTE_GPR_TOKENS_OR,
segment,
NULL, /* look at all containers on this segment */
keys[0],
mca_oob_tcp_registry_callback, NULL))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&sub);
OBJ_DESTRUCT(&trig);
free(sub_name);
free(trig_name);
free(segment);
return rc;
}
/* the id of each subscription is stored by the system in the corresponding
* subscription object we passed into orte_gpr.subscribe. We record it
/* the id of each subscription is recorded
* here so we can (if desired) cancel that subscription later
*/
subscription->subid = sub.id;
subscription->subid = sub_id;
/* done with these, so release any memory */
OBJ_DESTRUCT(&sub);
OBJ_DESTRUCT(&trig);
free(trig_name);
free(sub_name);
/* now setup to put our contact info on registry */
buffer = OBJ_NEW(orte_buffer_t);
if(buffer == NULL) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return OMPI_ERR_OUT_OF_RESOURCE;
return ORTE_ERR_OUT_OF_RESOURCE;
}
rc = mca_oob_tcp_addr_pack(buffer);
if(rc != OMPI_SUCCESS) {
if (ORTE_SUCCESS != (rc = mca_oob_tcp_addr_pack(buffer))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buffer);
return rc;
}
/* put our contact info in registry */
value = OBJ_NEW(orte_gpr_value_t);
if (NULL == value) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value->addr_mode = ORTE_GPR_OVERWRITE | ORTE_GPR_TOKENS_XAND;
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&(value->segment), jobid))) {
/* extract payload for storage */
types[0] = ORTE_BYTE_OBJECT;
if (ORTE_SUCCESS != (rc = orte_dps.unload(buffer, (void**)&(values[0].byteobject.bytes),
&(values[0].byteobject.size)))) {
ORTE_ERROR_LOG(rc);
return rc;
}
value->cnt = 2;
value->keyvals = (orte_gpr_keyval_t**)malloc(value->cnt * sizeof(orte_gpr_keyval_t*));
if(NULL == value->keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value->keyvals[0] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == value->keyvals[0]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value->keyvals[1] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == value->keyvals[1]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (ORTE_SUCCESS != (rc = orte_schema.get_proc_tokens(&(value->tokens),
&(value->num_tokens), orte_process_info.my_name))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(value);
return rc;
}
(value->keyvals[0])->type = ORTE_BYTE_OBJECT;
(value->keyvals[0])->key = strdup("oob-tcp");
rc = orte_dps.unload(buffer, (void**)&(value->keyvals[0])->value.byteobject.bytes,
&(value->keyvals[0])->value.byteobject.size);
if(rc != ORTE_SUCCESS) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(value);
free(segment);
OBJ_RELEASE(buffer);
return rc;
}
OBJ_RELEASE(buffer);
(value->keyvals[1])->type = ORTE_STRING;
(value->keyvals[1])->key = strdup(ORTE_PROC_RML_IP_ADDRESS_KEY);
/* setup the IP address for storage */
tmp = mca_oob.oob_get_addr();
tmp2 = strrchr(tmp, '/') + 1;
tmp3 = strrchr(tmp, ':');
@ -982,33 +833,37 @@ int mca_oob_tcp_init(void)
"returned for selected oob interfaces.\n",
ORTE_NAME_ARGS(orte_process_info.my_name), tmp);
ORTE_ERROR_LOG(ORTE_ERROR);
free(segment);
free(tmp);
free(values[0].byteobject.bytes);
return ORTE_ERROR;
}
*tmp3 = '\0';
(value->keyvals[1])->value.strptr = strdup(tmp2);
types[1] = ORTE_STRING;
values[1].strptr = strdup(tmp2);
free(tmp);
if(mca_oob_tcp_component.tcp_debug > 2) {
opal_output(0, "[%lu,%lu,%lu] mca_oob_tcp_init: calling orte_gpr.put(%s)\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
value->segment);
}
rc = orte_gpr.put(1, &value);
if(rc != OMPI_SUCCESS) {
/* get the process tokens */
if (ORTE_SUCCESS != (rc = orte_schema.get_proc_tokens(&tokens, &num_tokens,
orte_process_info.my_name))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(value);
OBJ_RELEASE(buffer);
free(segment);
free(values[0].byteobject.bytes);
free(values[1].strptr);
return rc;
}
OBJ_RELEASE(buffer);
OBJ_RELEASE(value);
if(rc != ORTE_SUCCESS) {
/* put our contact info in registry */
if (ORTE_SUCCESS != (rc = orte_gpr.put_N(ORTE_GPR_OVERWRITE | ORTE_GPR_TOKENS_XAND,
segment, tokens, 2, keys, types, values))) {
ORTE_ERROR_LOG(rc);
return rc;
}
return OMPI_SUCCESS;
free(segment);
free(values[0].byteobject.bytes);
free(values[1].strptr);
return rc;
}
/*

Просмотреть файл

@ -115,12 +115,10 @@ int orte_rmgr_base_terminate_job_not_available(orte_jobid_t);
int orte_rmgr_base_terminate_proc_not_available(const orte_process_name_t*);
int orte_rmgr_base_proc_stage_gate_init(orte_jobid_t job);
int orte_rmgr_base_proc_stage_gate_subscribe(orte_jobid_t job, orte_gpr_notify_cb_fn_t, void*);
void orte_rmgr_base_proc_stage_gate_mgr(
orte_gpr_notify_data_t *data,
void *user_tag);
void orte_rmgr_base_proc_stage_gate_mgr_abort(
orte_gpr_notify_data_t *data,
void *user_tag);
int orte_rmgr_base_proc_stage_gate_mgr(
orte_gpr_notify_message_t *msg);
int orte_rmgr_base_proc_stage_gate_mgr_abort(
orte_gpr_notify_message_t *msg);
int orte_rmgr_base_spawn_not_available(
orte_app_context_t** app_context,
size_t num_context,

Просмотреть файл

@ -30,6 +30,7 @@
#include "orte/dps/dps.h"
#include "orte/mca/gpr/gpr.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/soh/soh.h"
@ -41,9 +42,7 @@ int orte_rmgr_base_proc_stage_gate_init(orte_jobid_t job)
{
size_t i, num_counters=6, num_named_trigs=5;
int rc;
orte_gpr_value_t *values, value, trigvalue, *trigvals;
orte_gpr_trigger_t trig, *trigs;
orte_gpr_subscription_t sub, *subs;
orte_gpr_value_t *values, value;
char* keys[] = {
/* changes to this ordering need to be reflected in code below */
ORTE_PROC_NUM_AT_STG1,
@ -61,6 +60,9 @@ int orte_rmgr_base_proc_stage_gate_init(orte_jobid_t job)
ORTE_NUM_FINALIZED_TRIGGER,
ORTE_NUM_TERMINATED_TRIGGER
};
char *segment, *trig_name, *tokens[2], *trig_keys[2];
orte_gpr_trigger_id_t id;
size_t trig_level;
/* setup the counters */
OBJ_CONSTRUCT(&value, orte_gpr_value_t);
@ -106,487 +108,172 @@ int orte_rmgr_base_proc_stage_gate_init(orte_jobid_t job)
}
OBJ_DESTRUCT(&value);
/* for the stage gate triggers, we want the counter values returned to us AND
* information on VPID_START so we can generate the list of peers
* to receive the xcast messages for barrier release.
/*** DEFINE STAGE GATE STANDARD TRIGGERS ***/
/* The standard triggers will return the trigger counters so that we
* can get required information for notifying processes. Other
* subscriptions will then attach to them.
*/
/*** SUBSCRIPTIONS ***/
/* the subscription object is used to define the values we want
* returned to us. we'll enter the precise data
* keys when we are ready to register the subscription - for now,
* do all the basic stuff
*/
OBJ_CONSTRUCT(&sub, orte_gpr_subscription_t);
/* we do not name the subscription - see explanation below. also, we do
* not assign the subscription id here - it is assigned for us when the
* registry "registers" the subscription and is returned in the
* subscription object at that time
*/
/*
* set the action to delete the subscription after the trigger fires. this
* subscription is solely for the purpose of returning stagegate information
* to the resource manager - we don't need it after that happens
*/
sub.action = ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG;
/*
* setup the value object to define the data to be returned to us
*/
OBJ_CONSTRUCT(&value, orte_gpr_value_t);
values = &value;
sub.values = &values;
sub.cnt = 1;
/* set the address mode to identify a specific container (in this case,
* the ORTE_JOB_GLOBALS container) and any keys within it
*/
value.addr_mode = ORTE_GPR_TOKENS_XAND | ORTE_GPR_KEYS_OR;
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&(value.segment), job))) {
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, job))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return rc;
}
/* define the tokens for the container */
value.tokens = (char**)malloc(sizeof(char*));
if (NULL == value.tokens) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value.tokens[0] = strdup(ORTE_JOB_GLOBALS); /* the counters are in the job's globals container */
value.num_tokens = 1;
/* define the keys to be returned */
value.cnt = 3;
value.keyvals = (orte_gpr_keyval_t**)malloc(value.cnt * sizeof(orte_gpr_keyval_t*));
if (NULL == value.keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
for (i=0; i < value.cnt; i++) {
value.keyvals[i] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == value.keyvals[i]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
}
/* the 0th entry will be defined below */
value.keyvals[1]->key = strdup(ORTE_JOB_SLOTS_KEY);
value.keyvals[2]->key = strdup(ORTE_JOB_VPID_START_KEY);
/* we don't need to define the type and value for the keyvals - the subscribe
* function ignores those fields
*/
tokens[0] = strdup(ORTE_JOB_GLOBALS);
tokens[1] = NULL;
sub.cbfunc = orte_rmgr_base_proc_stage_gate_mgr;
sub.user_tag = NULL;
/*** TRIGGERS ***/
/* setup the trigger information - initialize the common elements */
OBJ_CONSTRUCT(&trig, orte_gpr_trigger_t);
/* we WILL name the trig - see explanation below. we do
* NOT assign the trigger id here - it is assigned for us when the
* registry "registers" the trigger and is returned in the
* trigger object at that time
*/
/*
* set the action to compare all specified counter levels. this will
* "fire" the trigger when all counters are equal
*/
trig.action = ORTE_GPR_TRIG_ALL_CMP;
/*
* setup the value object to define the data to be returned to us
*/
OBJ_CONSTRUCT(&trigvalue, orte_gpr_value_t);
trigvals = &trigvalue;
trig.values = &trigvals;
trig.cnt = 1;
/* set the address mode to identify a specific container (in this case,
* the ORTE_JOB_GLOBALS container) and any keys within it
*/
trigvalue.addr_mode = ORTE_GPR_TOKENS_XAND | ORTE_GPR_KEYS_OR;
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&(trigvalue.segment), job))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
/* define the tokens for the container */
trigvalue.tokens = (char**)malloc(sizeof(char*));
if (NULL == trigvalue.tokens) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
trigvalue.tokens[0] = strdup(ORTE_JOB_GLOBALS); /* the counters are in the job's globals container */
trigvalue.num_tokens = 1;
/* define the keys that identify the counters */
trigvalue.cnt = 2;
trigvalue.keyvals = (orte_gpr_keyval_t**)malloc(trigvalue.cnt * sizeof(orte_gpr_keyval_t*));
if (NULL == trigvalue.keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
trigvalue.keyvals[0] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == trigvalue.keyvals[0]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
trigvalue.keyvals[1] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == trigvalue.keyvals[1]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
/* setup the triggers for the three main stage gates - these all compare
* their value to that in ORTE_JOB_SLOTS_KEY
*/
trigvalue.keyvals[0]->key = strdup(ORTE_JOB_SLOTS_KEY);
if (NULL == trigvalue.keyvals[0]->key) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
/* we don't need to define the type and value for the keyvals - the subscribe
* function ignores those fields
*/
/* do the three stage gate subscriptions, plus the named triggers
* that compare their values to the JOB_SLOTS_KEY
*/
trig_keys[0] = strdup(ORTE_JOB_SLOTS_KEY);
for (i=0; i < num_named_trigs; i++) {
/*
* NOTE: we do NOT name the subscriptions here as these are not
* standard subscriptions that multiple processes should attach
* themselves to - the subscriptions only have meaning to the
* resource manager
*/
value.keyvals[0]->key = strdup(keys[i]);
if (NULL == value.keyvals[0]->key) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
/*
* NOTE: we DO name the triggers as these will be standard triggers
* that multiple processes will want to attach themselves to - for
* example, a process may well want to receive some information when
* it reaches STAGE_GATE_1, and so will "attach" itself to that
* trigger as defined by us here
*/
if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&(trig.name),
trig_keys[1] = strdup(keys[i]);
if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&trig_name,
trig_names[i], job))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
free(trig_keys[0]);
free(trig_keys[1]);
return rc;
}
trigvalue.keyvals[1]->key = strdup(keys[i]);
if (NULL == trigvalue.keyvals[1]->key) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
if (ORTE_SUCCESS != (rc = orte_gpr.define_trigger(&id, trig_name,
ORTE_GPR_TRIG_INCLUDE_TRIG_CNTRS | ORTE_GPR_TRIG_ONE_SHOT |
ORTE_GPR_TRIG_ROUTE_DATA_THRU_ME | ORTE_GPR_TRIG_CMP_LEVELS,
ORTE_GPR_TOKENS_XAND | ORTE_GPR_KEYS_OR,
segment, tokens, 2, trig_keys,
orte_rmgr_base_proc_stage_gate_mgr, NULL))) {
ORTE_ERROR_LOG(rc);
free(trig_name);
free(trig_keys[0]);
free(trig_keys[1]);
return rc;
}
subs = &sub;
trigs = &trig;
rc = orte_gpr.subscribe(
1, &subs,
1, &trigs);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
free(value.keyvals[0]->key);
value.keyvals[0]->key = NULL;
free(trig.name);
free(trigvalue.keyvals[1]->key);
trigvalue.keyvals[1]->key = NULL;
free(trig_name);
free(trig_keys[1]);
}
free(trig_keys[0]);
/* Next, setup the trigger that watches the NUM_ABORTED counter to see if
* any process abnormally terminates - if so, then call the
* stage_gate_mgr_abort function
* so it can in turn order the job to be aborted
/* Now define the abort trigger. Again, only the trigger counter needs
* to be returned, so we don't need to setup a subscription to get
* other information
*/
sub.cbfunc = orte_rmgr_base_proc_stage_gate_mgr_abort;
value.keyvals[0]->key = strdup(ORTE_PROC_NUM_ABORTED);
if (NULL == value.keyvals[0]->key) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
/* set the trigger name */
if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&(trig.name),
trig_keys[0] = strdup(ORTE_PROC_NUM_ABORTED);
if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&trig_name,
ORTE_NUM_ABORTED_TRIGGER, job))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
return rc;
}
/* set the trigger action to fire at a specified level */
trig.action = ORTE_GPR_TRIG_ALL_AT;
/* cleanup the trigger keyvals that are no longer needed - we will
* rebuild them as required
*/
OBJ_RELEASE(trigvalue.keyvals[0]);
OBJ_RELEASE(trigvalue.keyvals[1]);
free(trigvalue.keyvals);
/* we only need one trigger keyval here as we are not comparing
* trigger levels - we are just asking to be notified when
* a specific counter changes value to "1"
*/
trigvalue.cnt = 1;
trigvalue.keyvals = (orte_gpr_keyval_t**)malloc(sizeof(orte_gpr_keyval_t**));
if (NULL == trigvalue.keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
trigvalue.keyvals[0] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == trigvalue.keyvals[0]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
trigvalue.keyvals[0]->key = strdup(ORTE_PROC_NUM_ABORTED);
if (NULL == trigvalue.keyvals[0]->key) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
/* trigger on the first process that aborts */
trigvalue.keyvals[0]->type = ORTE_SIZE;
trigvalue.keyvals[0]->value.size = 1;
subs = &sub;
trigs = &trig;
rc = orte_gpr.subscribe(
1, &subs,
1, &trigs);
if (ORTE_SUCCESS != rc) {
trig_level = 1;
if (ORTE_SUCCESS != (rc = orte_gpr.define_trigger_level(&id, trig_name,
ORTE_GPR_TRIG_INCLUDE_TRIG_CNTRS | ORTE_GPR_TRIG_ONE_SHOT |
ORTE_GPR_TRIG_AT_LEVEL,
ORTE_GPR_TOKENS_XAND | ORTE_GPR_KEYS_OR,
segment, tokens, 1, trig_keys, &trig_level,
orte_rmgr_base_proc_stage_gate_mgr_abort, NULL))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
free(trig_name);
free(trig_keys[0]);
return rc;
}
free(trig_name);
free(trig_keys[0]);
/* set the job state to "launched" */
if (ORTE_SUCCESS != (rc = orte_soh.set_job_soh(job, ORTE_JOB_STATE_LAUNCHED))) {
ORTE_ERROR_LOG(rc);
}
CLEANUP:
OBJ_DESTRUCT(&trigvalue);
trig.values = NULL;
OBJ_DESTRUCT(&trig);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return rc;
}
void orte_rmgr_base_proc_stage_gate_mgr(orte_gpr_notify_data_t *data,
void *user_tag)
int orte_rmgr_base_proc_stage_gate_mgr(orte_gpr_notify_message_t *msg)
{
orte_gpr_value_t **values;
orte_gpr_keyval_t **kvals;
orte_buffer_t buffer;
orte_process_name_t *recipients=NULL;
size_t i, j, m, n=0;
orte_vpid_t k=0;
size_t n=0;
int rc;
bool found_slots=false, found_start=false;
bool found_stg1=false, found_stg2=false;
bool found_stg3=false, found_finalized=false;
orte_buffer_t msg;
orte_jobid_t job;
char **tokens=NULL;
size_t num_tokens;
values = (orte_gpr_value_t**)(data->values)->addr;
/* check to see if this came from terminate. If so, we ignore it because
* that stage gate does NOT set an xcast barrier - processes simply
* record their state and continue processing
*/
if (orte_schema.check_std_trigger_name(msg->target, ORTE_NUM_TERMINATED_TRIGGER)) {
return ORTE_SUCCESS;
}
/* get the jobid from the segment name
* we setup the stage gate triggers to return at least one value
* to us. we use that value to extract the jobid for the returned
* data
/* All stage gate triggers are named, so we can extract the jobid
* directly from the trigger name
*/
if (ORTE_SUCCESS != (rc = orte_schema.extract_jobid_from_std_trigger_name(&job, msg->target))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* need the list of peers for this job so we can send them the xcast.
* obtain this list from the name service's get_job_peers function
*/
if (ORTE_SUCCESS != (rc =
orte_schema.extract_jobid_from_segment_name(&job,
values[0]->segment))) {
if (ORTE_SUCCESS != (rc = orte_ns.get_job_peers(&recipients, &n, job))) {
ORTE_ERROR_LOG(rc);
return;
}
if (ORTE_SUCCESS != (rc = orte_schema.get_job_tokens(&tokens, &num_tokens, job))) {
ORTE_ERROR_LOG(rc);
return;
}
/* check to see if this came from one of the stage gates as opposed
* to either terminate or finalize - if the latter, we set the job
* state as appropriate and then return - no message needs to be
* sent to the processes themselves
*/
kvals = values[0]->keyvals;
for (i=0; i < values[0]->cnt; i++) {
if (0 == strcmp(kvals[i]->key, ORTE_PROC_NUM_TERMINATED)) {
if (ORTE_SUCCESS != (rc = orte_soh.set_job_soh(job, ORTE_JOB_STATE_TERMINATED))) {
ORTE_ERROR_LOG(rc);
}
goto CLEANUP;
}
}
/* value returned will contain the counter, which contains the number of
* procs in this job. We need to know which counter is included as this
* tells us the job state we have reached.
*/
for (i=0, m=0; m < data->cnt &&
i < (data->values)->size &&
(!found_slots || !found_start ||
(!found_stg1 && !found_stg2 && !found_stg3 && !found_finalized)); i++) {
if (NULL != values[i]) {
m++;
kvals = values[i]->keyvals;
/* check to see if ORTE_JOB_GLOBALS is the token */
if (NULL != values[i]->tokens &&
0 == strcmp(ORTE_JOB_GLOBALS, values[i]->tokens[0])) {
/* find the ORTE_JOB_SLOTS_KEY and the ORTE_JOB_VPID_START_KEY keyval */
for (j=0; j < values[i]->cnt &&
(!found_slots || !found_start ||
(!found_stg1 && !found_stg2 && !found_stg3 && !found_finalized)); j++) {
if (NULL != kvals[j] && !found_slots &&
0 == strcmp(ORTE_JOB_SLOTS_KEY, kvals[j]->key)) {
n = kvals[j]->value.size;
found_slots = true;
}
if (NULL != kvals[j] && !found_start &&
0 == strcmp(ORTE_JOB_VPID_START_KEY, kvals[j]->key)) {
k = kvals[j]->value.vpid;
found_start = true;
}
if (NULL != kvals[j] &&
0 == strcmp(ORTE_PROC_NUM_AT_STG1, kvals[j]->key)) {
found_stg1 = true;
} else if (NULL != kvals[j] &&
0 == strcmp(ORTE_PROC_NUM_AT_STG2, kvals[j]->key)) {
found_stg2 = true;
} else if (NULL != kvals[j] &&
0 == strcmp(ORTE_PROC_NUM_AT_STG3, kvals[j]->key)) {
found_stg3 = true;
} else if (NULL != kvals[j] &&
0 == strcmp(ORTE_PROC_NUM_FINALIZED, kvals[j]->key)) {
found_finalized = true;
}
}
}
}
}
if (!found_slots) {
ORTE_ERROR_LOG(ORTE_ERR_GPR_DATA_CORRUPT);
goto CLEANUP;
}
if (!found_start) {
ORTE_ERROR_LOG(ORTE_ERR_GPR_DATA_CORRUPT);
goto CLEANUP;
return rc;
}
/* set the job state to the appropriate level */
if (found_stg1) {
if (orte_schema.check_std_trigger_name(msg->target, ORTE_STG1_TRIGGER)) {
if (ORTE_SUCCESS != (rc = orte_soh.set_job_soh(job, ORTE_JOB_STATE_AT_STG1))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
} else if (found_stg2) {
} else if (orte_schema.check_std_trigger_name(msg->target, ORTE_STG2_TRIGGER)) {
if (ORTE_SUCCESS != (rc = orte_soh.set_job_soh(job, ORTE_JOB_STATE_AT_STG2))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
} else if (found_stg3) {
} else if (orte_schema.check_std_trigger_name(msg->target, ORTE_STG3_TRIGGER)) {
if (ORTE_SUCCESS != (rc = orte_soh.set_job_soh(job, ORTE_JOB_STATE_AT_STG3))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
} else if (found_finalized) {
} else if (orte_schema.check_std_trigger_name(msg->target, ORTE_NUM_FINALIZED_TRIGGER)) {
if (ORTE_SUCCESS != (rc = orte_soh.set_job_soh(job, ORTE_JOB_STATE_FINALIZED))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
}
/* now can generate the list of recipients */
recipients = (orte_process_name_t*)malloc(n * sizeof(orte_process_name_t));
for (i=0; i < n; i++) {
recipients[i].cellid = 0;
recipients[i].jobid = job;
recipients[i].vpid = (orte_vpid_t)(k + i);
}
/* for the purposes of the stage gate manager, we don't actually have
* to determine anything from the message. All we have to do is respond
* by sending an xcast to all processes. However, the buffer has to include
* at least one piece of data for the RML to function, so pack something
* meaningless.
/* set the message type to SUBSCRIPTION. When we give this to the processes, we want
* them to break the message down and deliver it to the various subsystems.
*/
msg->msg_type = ORTE_GPR_SUBSCRIPTION_MSG;
msg->id = ORTE_GPR_TRIGGER_ID_MAX;
OBJ_CONSTRUCT(&msg, orte_buffer_t);
if (ORTE_SUCCESS != (rc = orte_dps.pack(&msg, &job, 1, ORTE_JOBID))) {
/* need to pack the msg for sending */
OBJ_CONSTRUCT(&buffer, orte_buffer_t);
if (ORTE_SUCCESS != (rc = orte_dps.pack(&buffer, &msg, 1, ORTE_GPR_NOTIFY_MSG))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&msg);
OBJ_DESTRUCT(&buffer);
goto CLEANUP;
}
/* send the message */
if (ORTE_SUCCESS != (rc = orte_rml.xcast(orte_process_info.my_name, recipients,
n, &msg, NULL))) {
n, &buffer, NULL, NULL))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&msg);
goto CLEANUP;
}
OBJ_DESTRUCT(&msg);
OBJ_DESTRUCT(&buffer);
CLEANUP:
for (j=0; j < num_tokens; j++) {
free(tokens[j]);
tokens[j] = NULL;
}
if (NULL != tokens) free(tokens);
if (NULL != recipients) free(recipients);
return;
return rc;
}
void orte_rmgr_base_proc_stage_gate_mgr_abort(orte_gpr_notify_data_t *data,
void *user_tag)
int orte_rmgr_base_proc_stage_gate_mgr_abort(orte_gpr_notify_message_t *msg)
{
orte_gpr_value_t **values;
orte_jobid_t job;
int rc;
/* get the jobid from the segment name
* we setup the stage gate triggers to return at least one value
* to us. we use that value to extract the jobid for the returned
* data
*/
values = (orte_gpr_value_t**)(data->values)->addr;
if (ORTE_SUCCESS != (rc =
orte_schema.extract_jobid_from_segment_name(&job,
values[0]->segment))) {
ORTE_ERROR_LOG(rc);
return;
}
/* All stage gate triggers are named, so we can extract the jobid
* directly from the trigger name
*/
if (ORTE_SUCCESS != (rc = orte_schema.extract_jobid_from_std_trigger_name(&job, msg->target))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* set the job status to "aborted" */
@ -595,27 +282,30 @@ void orte_rmgr_base_proc_stage_gate_mgr_abort(orte_gpr_notify_data_t *data,
}
orte_errmgr.incomplete_start(job);
return ORTE_SUCCESS;
}
/*
* Routine that subscribes to events on all counters.
* Routine that tools such as orterun can use to subscribe
* to events on all counters.
*/
int orte_rmgr_base_proc_stage_gate_subscribe(orte_jobid_t job, orte_gpr_notify_cb_fn_t cbfunc, void* cbdata)
{
size_t i;
int rc;
orte_gpr_value_t value, *values;
orte_gpr_trigger_t trig, *trigs;
orte_gpr_subscription_t sub, *subs;
char *segment, *trig_name, *tokens[2];
orte_gpr_subscription_id_t id;
char* keys[] = {
/* changes to this ordering need to be reflected in code below */
ORTE_PROC_NUM_AT_STG1,
ORTE_PROC_NUM_AT_STG2,
ORTE_PROC_NUM_AT_STG3,
ORTE_PROC_NUM_FINALIZED,
ORTE_PROC_NUM_TERMINATED
ORTE_PROC_NUM_TERMINATED,
ORTE_PROC_NUM_ABORTED
};
char* trig_names[] = {
/* changes to this ordering need to be reflected in code below
@ -625,160 +315,45 @@ int orte_rmgr_base_proc_stage_gate_subscribe(orte_jobid_t job, orte_gpr_notify_c
ORTE_STG2_TRIGGER,
ORTE_STG3_TRIGGER,
ORTE_NUM_FINALIZED_TRIGGER,
ORTE_NUM_TERMINATED_TRIGGER
ORTE_NUM_TERMINATED_TRIGGER,
ORTE_NUM_ABORTED_TRIGGER
};
size_t num_counters = sizeof(keys)/sizeof(keys[0]);
/*** SUBSCRIPTIONS ***/
/* the subscription object is used to define the values we want
* returned to us. we'll enter the precise data
* keys when we are ready to register the subscription - for now,
* do all the basic stuff
*/
OBJ_CONSTRUCT(&sub, orte_gpr_subscription_t);
/* we do not name the subscription - see explanation below. also, we do
* not assign the subscription id here - it is assigned for us when the
* registry "registers" the subscription and is returned in the
* subscription object at that time
*/
/*
* set the action to delete the subscription after the trigger fires. this
* subscription is solely for the purpose of returning stagegate information
* to the resource manager - we don't need it after that happens
*/
sub.action = ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG;
/*
* setup the value object to define the data to be returned to us
*/
OBJ_CONSTRUCT(&value, orte_gpr_value_t);
values = &value;
sub.values = &values;
sub.cnt = 1;
/* set the address mode to identify a specific container (in this case,
* the ORTE_JOB_GLOBALS container) and any keys within it
*/
value.addr_mode = ORTE_GPR_TOKENS_XAND | ORTE_GPR_KEYS_OR;
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&(value.segment), job))) {
/* identify the segment for this job */
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, job))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return rc;
}
/* define the tokens for the container */
value.tokens = (char**)malloc(sizeof(char*));
if (NULL == value.tokens) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value.tokens[0] = strdup(ORTE_JOB_GLOBALS); /* the counters are in the job's globals container */
value.num_tokens = 1;
/* the keys describing the data to be returned will be defined later
* for now, we simply allocate the space
*/
value.keyvals = (orte_gpr_keyval_t**)malloc(sizeof(orte_gpr_keyval_t*));
if (NULL == value.keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value.keyvals[0] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == value.keyvals[0]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value.cnt = 1;
/* define the callback and associated data tag */
sub.cbfunc = cbfunc;
sub.user_tag = cbdata;
/*** TRIGGERS ***/
/* setup the trigger information - initialize the common elements */
OBJ_CONSTRUCT(&trig, orte_gpr_trigger_t);
/* since the named triggers have already been defined, we don't need
* to replicate that here! all we need to do is refer to the
* proper trigger name - we'll do that below
*/
trig.action = ORTE_GPR_TRIG_ALL_CMP;
/* setup the tokens */
tokens[0]=ORTE_JOB_GLOBALS;
tokens[1]=NULL;
/* do the trigger subscriptions */
for (i=0; i < num_counters; i++) {
/* insert the subscription key identifying the data to
* be returned from this trigger
*/
value.keyvals[0]->key = strdup(keys[i]);
if (NULL == value.keyvals[0]->key) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
/* get the standard trigger name to which we are "attaching" */
if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&(trig.name),
trig_names[i], job))) {
/* attach ourselves to the appropriate standard trigger */
if (ORTE_SUCCESS !=
(rc = orte_schema.get_std_trigger_name(&trig_name, trig_names[i], job))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
free(segment);
return rc;
}
subs = &sub;
trigs = &trig;
rc = orte_gpr.subscribe(
1, &subs,
1, &trigs);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
free(value.keyvals[0]->key);
value.keyvals[0]->key = NULL;
free(trig.name);
trig.name = NULL;
if (ORTE_SUCCESS != (rc = orte_gpr.subscribe_1(&id, trig_name, NULL,
ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG,
ORTE_GPR_TOKENS_OR | ORTE_GPR_KEYS_OR,
segment, tokens, keys[i],
cbfunc, cbdata))) {
ORTE_ERROR_LOG(rc);
free(segment);
free(trig_name);
return rc;
}
free(trig_name);
}
free(segment);
/* Now do the abort trigger.
* setup the subscription to return the number aborted\
*/
value.keyvals[0]->key = strdup(ORTE_PROC_NUM_ABORTED);
if (NULL == value.keyvals[0]->key) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
/* set the trigger action */
trig.action = ORTE_GPR_TRIG_ALL_AT;
/* get the standard "abort" trigger name */
if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&(trig.name),
ORTE_NUM_ABORTED_TRIGGER, job))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
subs = &sub;
trigs = &trig;
rc = orte_gpr.subscribe(
1, &subs,
1, &trigs);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
CLEANUP:
OBJ_DESTRUCT(&trig);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return rc;
return ORTE_SUCCESS;
}

Просмотреть файл

@ -169,7 +169,7 @@ typedef int (*orte_rmgr_base_module_proc_stage_gate_init_fn_t)(orte_jobid_t job)
* usually, broadcasting a message to all processes in the job that allows them
* to proceed.
*/
typedef void (*orte_rmgr_base_module_proc_stage_gate_mgr_fn_t)(orte_gpr_notify_data_t *data, void *user_tag);
typedef int (*orte_rmgr_base_module_proc_stage_gate_mgr_fn_t)(orte_gpr_notify_message_t *msg);
/**
* Cleanup resources held by rmgr.

Просмотреть файл

@ -338,7 +338,8 @@ typedef int (*orte_rml_module_xcast_fn_t)(
orte_process_name_t* peers,
size_t num_peers,
orte_buffer_t* buffer,
orte_rml_buffer_callback_fn_t cbfunc);
orte_gpr_trigger_cb_fn_t cbfunc,
void *user_tag);
/*
* Initialization/Cleanup