1
1

Hey, sports fans!! Guess what??

Here's the huge registry check-in you've all been waiting for with baited breath. The revised version sends a single message to all processes at the various stage gates, thus making the startup much more scalable. I could provide you with all the tawdry details, but won't for now - you are welcome to ask, though, and I'll merrily bore your ears to tears.

In addition, the commit contains the following:

1. set the ignore properties on ompi/debuggers and orte/mca/pls/poe

2. Added simplified subscribe and put functions to the registry's API. I have also converted all of the ompi functions that registered subscriptions to the new API, and caught their associated put's as well.

In a follow-on commit, I'll be adding support for George's hetero arch registry subscription (wanted to get this one in first).

This commit was SVN r7118.
Этот коммит содержится в:
Ralph Castain 2005-09-01 01:07:30 +00:00
родитель 4ac2445c61
Коммит 96f4bb7a63
51 изменённых файлов: 3040 добавлений и 2494 удалений

Просмотреть файл

@ -104,9 +104,8 @@ static int set_f(int keyval, MPI_Fint value);
int ompi_attr_create_predefined(void) int ompi_attr_create_predefined(void)
{ {
int rc, ret; int rc, ret;
orte_gpr_trigger_t trig, *trig1; orte_gpr_subscription_id_t id;
orte_gpr_value_t value, *values; char *sub_name, *trig_name;
orte_gpr_subscription_t sub, *sub1;
orte_jobid_t job; orte_jobid_t job;
/* Create all the keyvals */ /* Create all the keyvals */
@ -177,99 +176,40 @@ int ompi_attr_create_predefined(void)
return rc; return rc;
} }
OBJ_CONSTRUCT(&sub, orte_gpr_subscription_t); /* indicate that this is a standard subscription. This indicates
/* indicate that this is a standard subscription. This indicates that the that the subscription will be common to all processes. Thus,
* subscription will be common to all processes. Thus, the resulting data the resulting data can be consolidated into a
* can be consolidated into a process-independent message and broadcast process-independent message and broadcast to all processes */
* to all processes if (ORTE_SUCCESS !=
*/ (rc = orte_schema.get_std_subscription_name(&sub_name,
if (ORTE_SUCCESS != (rc = orte_schema.get_std_subscription_name(&(sub.name),
OMPI_ATTRIBUTE_SUBSCRIPTION, job))) { OMPI_ATTRIBUTE_SUBSCRIPTION, job))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* send data when trigger fires, then delete -
* no need for further notifications
*/
sub.action = ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG;
OBJ_CONSTRUCT(&value, orte_gpr_value_t); /* attach ourselves to the standard stage-1 trigger */
values = &value; if (ORTE_SUCCESS !=
sub.values = &values; (rc = orte_schema.get_std_trigger_name(&trig_name,
sub.cnt = 1;
value.addr_mode = ORTE_GPR_TOKENS_OR | ORTE_GPR_KEYS_OR;
value.segment = strdup(ORTE_NODE_SEGMENT);
if (NULL == value.segment) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value.tokens = NULL; /* wildcard - look at all containers */
value.num_tokens = 0;
value.cnt = 1;
value.keyvals = (orte_gpr_keyval_t**)malloc(sizeof(orte_gpr_keyval_t*));
if (NULL == value.keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value.keyvals[0] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == value.keyvals[0]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value.keyvals[0]->key = strdup(ORTE_NODE_SLOTS_KEY);
if (NULL == value.keyvals[0]->key) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub.cbfunc = ompi_attr_create_predefined_callback;
sub.user_tag = NULL;
/* setup the trigger information */
OBJ_CONSTRUCT(&trig, orte_gpr_trigger_t);
if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&(trig.name),
ORTE_STG1_TRIGGER, job))) { ORTE_STG1_TRIGGER, job))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&value); free(sub_name);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return rc; return rc;
} }
/* this is an ORTE-standard trigger that is defined by the ORTE resource manager if (ORTE_SUCCESS != (rc = orte_gpr.subscribe_1(&id, trig_name, sub_name,
* when the job was launched - therefore, we don't need to provide any additional ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG,
* info ORTE_GPR_TOKENS_OR | ORTE_GPR_KEYS_OR,
*/ ORTE_NODE_SEGMENT,
NULL, /* wildcard - look at all containers */
/* do the subscription */ ORTE_NODE_SLOTS_KEY,
sub1 = ⊂ ompi_attr_create_predefined_callback, NULL))) {
trig1 = &trig; ORTE_ERROR_LOG(rc);
rc = orte_gpr.subscribe(1, &sub1, 1, &trig1);
if(ORTE_SUCCESS != rc) {
opal_output(0, "ompi_attr_create_predefined: subscribe failed");
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
OBJ_DESTRUCT(&trig);
return OMPI_ERROR;
} }
OBJ_DESTRUCT(&value); free(trig_name);
sub.values = NULL; free(sub_name);
OBJ_DESTRUCT(&sub);
OBJ_DESTRUCT(&trig); return rc;
return OMPI_SUCCESS;
} }

Просмотреть файл

@ -380,8 +380,8 @@ opal_output(0, "[%lu,%lu,%lu] mca_pml_base_modex_registry_callback: %s-%s-%d-%d
static int mca_pml_base_modex_subscribe(orte_process_name_t* name) static int mca_pml_base_modex_subscribe(orte_process_name_t* name)
{ {
orte_gpr_trigger_t trig, *trigs; char *segment, *sub_name, *trig_name;
orte_gpr_subscription_t sub, *subs; orte_gpr_subscription_id_t sub_id;
orte_jobid_t jobid; orte_jobid_t jobid;
opal_list_item_t* item; opal_list_item_t* item;
mca_pml_base_modex_subscription_t* subscription; mca_pml_base_modex_subscription_t* subscription;
@ -408,102 +408,48 @@ static int mca_pml_base_modex_subscribe(orte_process_name_t* name)
return rc; return rc;
} }
/* setup the subscription definition */ if (ORTE_SUCCESS != (rc = orte_schema.get_std_subscription_name(&sub_name,
OBJ_CONSTRUCT(&sub, orte_gpr_subscription_t);
/* indicate that this is a standard subscription. This indicates that the
* subscription will be common to all processes. Thus, the resulting data
* can be consolidated into a process-independent message and broadcast
* to all processes
*/
if (ORTE_SUCCESS != (rc = orte_schema.get_std_subscription_name(&(sub.name),
OMPI_MODEX_SUBSCRIPTION, jobid))) { OMPI_MODEX_SUBSCRIPTION, jobid))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* send data when trigger fires, continue to monitor. The default
* action for any subscription that includes a trigger condition is
* to send the specified data when the trigger fires. This set of flags
* indicates that - AFTER the trigger fires - the subscription should
* continue to send data any time an entry is added or changed.
*/
sub.action = ORTE_GPR_NOTIFY_ADD_ENTRY |
ORTE_GPR_NOTIFY_VALUE_CHG |
ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG;
/* setup the value structures that describe the data to /* attach to the stage-1 standard trigger */
* be monitored and returned by this subscription if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&trig_name,
*/
sub.cnt = 1;
sub.values = (orte_gpr_value_t**)malloc(sizeof(orte_gpr_value_t*));
if (NULL == sub.values) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub.values[0] = OBJ_NEW(orte_gpr_value_t);
if (NULL == sub.values[0]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub.cnt = 1;
/* define the segment */
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(
&(sub.values[0]->segment), jobid))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&sub);
return rc;
}
sub.values[0]->addr_mode = ORTE_GPR_KEYS_OR | ORTE_GPR_TOKENS_OR;
/* look at all containers on this segment */
sub.values[0]->tokens = NULL;
sub.values[0]->num_tokens = 0;
/* look for any keyval with "modex" key */
sub.values[0]->cnt = 1;
sub.values[0]->keyvals = (orte_gpr_keyval_t**)malloc(sizeof(orte_gpr_keyval_t*));
if (NULL == sub.values[0]->keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub.values[0]->keyvals[0] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == sub.values[0]->keyvals[0]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub.values[0]->keyvals[0]->key = strdup("modex");
if (NULL == sub.values[0]->keyvals[0]->key) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* define the callback function */
sub.cbfunc = mca_pml_base_modex_registry_callback;
sub.user_tag = NULL;
/* setup the trigger definition */
OBJ_CONSTRUCT(&trig, orte_gpr_trigger_t);
if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&(trig.name),
ORTE_STG1_TRIGGER, jobid))) { ORTE_STG1_TRIGGER, jobid))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
free(sub_name);
return rc; return rc;
} }
/* this is an ORTE-standard trigger that is defined by the ORTE resource manager /* define the segment */
* when the job was launched - therefore, we don't need to provide any additional if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) {
* info ORTE_ERROR_LOG(rc);
*/ free(sub_name);
free(trig_name);
return rc;
}
/* register the subscription */ if (ORTE_SUCCESS != (rc = orte_gpr.subscribe_1(&sub_id, trig_name, sub_name,
subs = ⊂ ORTE_GPR_NOTIFY_ADD_ENTRY |
trigs = &trig; ORTE_GPR_NOTIFY_VALUE_CHG |
rc = orte_gpr.subscribe(1, &subs, 1, &trigs); ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG,
if(ORTE_SUCCESS != rc) { ORTE_GPR_KEYS_OR | ORTE_GPR_TOKENS_OR,
segment,
NULL, /* look at all containers on this segment */
"modex",
mca_pml_base_modex_registry_callback, NULL))) {
ORTE_ERROR_LOG(rc);
opal_output(0, "mca_pml_base_modex_exchange: " opal_output(0, "mca_pml_base_modex_exchange: "
"orte_gpr.subscribe failed with return code %d\n", rc); "orte_gpr.subscribe failed with return code %d\n", rc);
OBJ_DESTRUCT(&sub); free(sub_name);
OBJ_DESTRUCT(&trig); free(trig_name);
return OMPI_ERROR; free(segment);
return rc;
} }
free(sub_name);
free(trig_name);
free(segment);
/* add this jobid to our list of subscriptions */ /* add this jobid to our list of subscriptions */
OPAL_LOCK(&mca_pml_base_modex_lock); OPAL_LOCK(&mca_pml_base_modex_lock);
@ -511,8 +457,6 @@ static int mca_pml_base_modex_subscribe(orte_process_name_t* name)
subscription->jobid = name->jobid; subscription->jobid = name->jobid;
opal_list_append(&mca_pml_base_modex_subscriptions, &subscription->item); opal_list_append(&mca_pml_base_modex_subscriptions, &subscription->item);
OPAL_UNLOCK(&mca_pml_base_modex_lock); OPAL_UNLOCK(&mca_pml_base_modex_lock);
OBJ_DESTRUCT(&sub);
OBJ_DESTRUCT(&trig);
return OMPI_SUCCESS; return OMPI_SUCCESS;
} }

Просмотреть файл

@ -25,6 +25,7 @@
#include "orte/mca/oob/oob.h" #include "orte/mca/oob/oob.h"
#include "orte/mca/ns/ns.h" #include "orte/mca/ns/ns.h"
#include "orte/mca/gpr/gpr.h" #include "orte/mca/gpr/gpr.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/proc_info.h" #include "orte/util/proc_info.h"
#include "ompi/proc/proc.h" #include "ompi/proc/proc.h"
#include "ompi/mca/pml/pml.h" #include "ompi/mca/pml/pml.h"
@ -336,15 +337,14 @@ int ompi_proc_get_proclist (orte_buffer_t* buf, int proclistsize, ompi_proc_t **
static int setup_registry_callback(void) static int setup_registry_callback(void)
{ {
int rc; int rc;
char *segment; char *segment, *sub_name, *trig_name, *keys[2];
ompi_proc_t *local = ompi_proc_local(); ompi_proc_t *local = ompi_proc_local();
orte_gpr_subscription_id_t id;
orte_jobid_t jobid; orte_jobid_t jobid;
orte_gpr_trigger_t trig, *trig1;
orte_gpr_value_t value, *values;
orte_gpr_subscription_t sub, *sub1;
if (ORTE_SUCCESS != orte_ns.get_jobid(&jobid, &local->proc_name)) { if (ORTE_SUCCESS != (rc = orte_ns.get_jobid(&jobid, &local->proc_name))) {
printf("Badness!\n"); ORTE_ERROR_LOG(rc);
return rc;
} }
/* find the job segment on the registry */ /* find the job segment on the registry */
@ -353,88 +353,52 @@ static int setup_registry_callback(void)
return rc; return rc;
} }
OBJ_CONSTRUCT(&sub, orte_gpr_subscription_t);
/* indicate that this is a standard subscription. This indicates /* indicate that this is a standard subscription. This indicates
that the subscription will be common to all processes. Thus, that the subscription will be common to all processes. Thus,
the resulting data can be consolidated into a the resulting data can be consolidated into a
process-independent message and broadcast to all processes */ process-independent message and broadcast to all processes */
if (ORTE_SUCCESS != if (ORTE_SUCCESS !=
(rc = orte_schema.get_std_subscription_name(&(sub.name), (rc = orte_schema.get_std_subscription_name(&sub_name,
OMPI_PROC_SUBSCRIPTION, jobid))) { OMPI_PROC_SUBSCRIPTION, jobid))) {
ORTE_ERROR_LOG(rc);
free(segment);
return rc; return rc;
} }
/* send data when trigger fires, then delete - no need for further /* define the keys to be returned */
notifications */ keys[0] = strdup(ORTE_PROC_NAME_KEY);
sub.action = ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG; keys[1] = strdup(ORTE_NODE_NAME_KEY);
OBJ_CONSTRUCT(&value, orte_gpr_value_t);
values = &value;
sub.values = &values;
sub.cnt = 1;
value.addr_mode = ORTE_GPR_TOKENS_OR | ORTE_GPR_KEYS_OR;
value.segment = segment;
value.tokens = NULL; /* wildcard - look at all containers */
value.num_tokens = 0;
value.cnt = 2;
value.keyvals =
(orte_gpr_keyval_t**)malloc(sizeof(orte_gpr_keyval_t*) * 2);
if (NULL == value.keyvals) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
value.keyvals[0] = NULL;
value.keyvals[1] = NULL;
value.keyvals[0] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == value.keyvals[0]) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
value.keyvals[0]->key = strdup(ORTE_PROC_NAME_KEY);
if (NULL == value.keyvals[0]->key) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
value.keyvals[1] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == value.keyvals[0]) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
value.keyvals[1]->key = strdup(ORTE_NODE_NAME_KEY);
if (NULL == value.keyvals[0]->key) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
/* Here we have to add another key to the registry to be able to get the information /* Here we have to add another key to the registry to be able to get the information
* about the remote architectures. * about the remote architectures.
* TODO: George. * TODO: George.
*/ */
sub.cbfunc = callback; /* attach ourselves to the standard stage-1 trigger */
sub.user_tag = NULL;
/* setup the trigger information */
OBJ_CONSTRUCT(&trig, orte_gpr_trigger_t);
if (ORTE_SUCCESS != if (ORTE_SUCCESS !=
(rc = orte_schema.get_std_trigger_name(&(trig.name), (rc = orte_schema.get_std_trigger_name(&trig_name,
ORTE_STG1_TRIGGER, jobid))) { ORTE_STG1_TRIGGER, jobid))) {
goto cleanup; ORTE_ERROR_LOG(rc);
goto CLEANUP;
} }
/* do the subscription */ if (ORTE_SUCCESS != (rc = orte_gpr.subscribe_N(&id, trig_name, sub_name,
sub1 = ⊂ ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG,
trig1 = &trig; ORTE_GPR_TOKENS_OR | ORTE_GPR_KEYS_OR,
rc = orte_gpr.subscribe(1, &sub1, 1, &trig1); segment,
NULL, /* wildcard - look at all containers */
2, keys,
callback, NULL))) {
ORTE_ERROR_LOG(rc);
}
free(trig_name);
CLEANUP:
free(segment);
free(sub_name);
free(keys[0]);
free(keys[1]);
cleanup:
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
OBJ_DESTRUCT(&trig);
return rc; return rc;
} }

Просмотреть файл

@ -93,7 +93,8 @@ int ompi_mpi_finalize(void)
/* /*
* Wait for everyone to get here * Wait for everyone to get here
*/ */
if (ORTE_SUCCESS != (ret = orte_rml.xcast(NULL, NULL, 0, NULL, NULL))) { if (ORTE_SUCCESS != (ret = orte_rml.xcast(NULL, NULL, 0, NULL,
orte_gpr.deliver_notify_msg, NULL))) {
ORTE_ERROR_LOG(ret); ORTE_ERROR_LOG(ret);
return ret; return ret;
} }
@ -219,7 +220,8 @@ int ompi_mpi_finalize(void)
* the RTE while the soh is trying to do the update - which causes * the RTE while the soh is trying to do the update - which causes
* an ugly race condition * an ugly race condition
*/ */
if (ORTE_SUCCESS != (ret = orte_rml.xcast(NULL, NULL, 0, NULL, NULL))) { if (ORTE_SUCCESS != (ret = orte_rml.xcast(NULL, NULL, 0, NULL,
orte_gpr.deliver_notify_msg, NULL))) {
ORTE_ERROR_LOG(ret); ORTE_ERROR_LOG(ret);
return ret; return ret;
} }

Просмотреть файл

@ -179,7 +179,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
} }
} }
#ifndef WIN32 #if 0
if (OMPI_SUCCESS != (ret = opal_util_register_stackhandlers ())) { if (OMPI_SUCCESS != (ret = opal_util_register_stackhandlers ())) {
error = "util_register_stackhandlers() failed"; error = "util_register_stackhandlers() failed";
goto error; goto error;
@ -363,7 +363,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
} }
/* FIRST BARRIER - WAIT FOR MSG FROM RMGR_PROC_STAGE_GATE_MGR TO ARRIVE */ /* FIRST BARRIER - WAIT FOR MSG FROM RMGR_PROC_STAGE_GATE_MGR TO ARRIVE */
if (ORTE_SUCCESS != (ret = orte_rml.xcast(NULL, NULL, 0, NULL, NULL))) { if (ORTE_SUCCESS != (ret = orte_rml.xcast(NULL, NULL, 0, NULL,
orte_gpr.deliver_notify_msg, NULL))) {
ORTE_ERROR_LOG(ret); ORTE_ERROR_LOG(ret);
error = "ompi_mpi_init: failed to see all procs register\n"; error = "ompi_mpi_init: failed to see all procs register\n";
goto error; goto error;
@ -469,7 +470,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
/* Second barrier -- wait for message from /* Second barrier -- wait for message from
RMGR_PROC_STAGE_GATE_MGR to arrive */ RMGR_PROC_STAGE_GATE_MGR to arrive */
if (ORTE_SUCCESS != (ret = orte_rml.xcast(NULL, NULL, 0, NULL, NULL))) { if (ORTE_SUCCESS != (ret = orte_rml.xcast(NULL, NULL, 0, NULL,
orte_gpr.deliver_notify_msg, NULL))) {
ORTE_ERROR_LOG(ret); ORTE_ERROR_LOG(ret);
error = "ompi_mpi_init: failed to see all procs register\n"; error = "ompi_mpi_init: failed to see all procs register\n";
goto error; goto error;

Просмотреть файл

@ -90,12 +90,13 @@ typedef uint8_t orte_data_type_t ;
#define ORTE_GPR_TRIGGER (orte_data_type_t) 44 /**< describes trigger conditions */ #define ORTE_GPR_TRIGGER (orte_data_type_t) 44 /**< describes trigger conditions */
#define ORTE_GPR_NOTIFY_DATA (orte_data_type_t) 45 /**< data returned from a subscription */ #define ORTE_GPR_NOTIFY_DATA (orte_data_type_t) 45 /**< data returned from a subscription */
#define ORTE_GPR_NOTIFY_MSG (orte_data_type_t) 46 /**< notify message containing notify_data objects */ #define ORTE_GPR_NOTIFY_MSG (orte_data_type_t) 46 /**< notify message containing notify_data objects */
#define ORTE_GPR_NOTIFY_MSG_TYPE (orte_data_type_t) 47 /**< notify message type (subscription or trigger) */
/* Resource Manager types */ /* Resource Manager types */
#define ORTE_APP_CONTEXT (orte_data_type_t) 47 /**< argv and enviro arrays */ #define ORTE_APP_CONTEXT (orte_data_type_t) 48 /**< argv and enviro arrays */
#define ORTE_APP_CONTEXT_MAP (orte_data_type_t) 48 /**< application context mapping array */ #define ORTE_APP_CONTEXT_MAP (orte_data_type_t) 49 /**< application context mapping array */
/* define the starting point for dynamically assigning data types */ /* define the starting point for dynamically assigning data types */
#define ORTE_DPS_ID_DYNAMIC 50 #define ORTE_DPS_ID_DYNAMIC 60
/* define a structure to hold generic byte objects */ /* define a structure to hold generic byte objects */
typedef struct { typedef struct {
@ -103,12 +104,4 @@ typedef struct {
uint8_t *bytes; uint8_t *bytes;
} orte_byte_object_t; } orte_byte_object_t;
/* define a print format to handle the variations in pid_t */
#if SIZEOF_PID_T == SIZEOF_INT
#define ORTE_PID_T_PRINTF "%u"
#elif SIZEOF_PID_T == SIZEOF_LONG
#define ORTE_PID_T_PRINTF "%lu"
#endif
#endif #endif

Просмотреть файл

@ -105,6 +105,8 @@ extern "C" {
#define ORTE_GPR_COMPOUND_CMD (uint8_t) 18 #define ORTE_GPR_COMPOUND_CMD (uint8_t) 18
#define ORTE_GPR_CLEANUP_JOB_CMD (uint8_t) 19 #define ORTE_GPR_CLEANUP_JOB_CMD (uint8_t) 19
#define ORTE_GPR_CLEANUP_PROC_CMD (uint8_t) 20 #define ORTE_GPR_CLEANUP_PROC_CMD (uint8_t) 20
#define ORTE_GPR_DUMP_A_TRIGGER_CMD (uint8_t) 21
#define ORTE_GPR_DUMP_A_SUBSCRIPTION_CMD (uint8_t) 22
#define ORTE_GPR_ERROR (uint8_t)0xff #define ORTE_GPR_ERROR (uint8_t)0xff
typedef uint8_t orte_gpr_cmd_flag_t; typedef uint8_t orte_gpr_cmd_flag_t;
@ -162,6 +164,18 @@ typedef uint8_t orte_gpr_cmd_flag_t;
orte_gpr_trigger_cb_fn_t cbfunc, orte_gpr_trigger_cb_fn_t cbfunc,
void *user_tag); void *user_tag);
OMPI_DECLSPEC int orte_gpr_base_define_trigger_level(orte_gpr_trigger_id_t *id,
char *trig_name,
orte_gpr_trigger_action_t action,
orte_gpr_addr_mode_t addr_mode,
char *segment,
char **tokens,
size_t n,
char **keys,
size_t *levels,
orte_gpr_trigger_cb_fn_t cbfunc,
void *user_tag);
/* general usage functions */ /* general usage functions */
OMPI_DECLSPEC int orte_gpr_base_pack_delete_segment(orte_buffer_t *cmd, OMPI_DECLSPEC int orte_gpr_base_pack_delete_segment(orte_buffer_t *cmd,
char *segment); char *segment);
@ -202,8 +216,15 @@ typedef uint8_t orte_gpr_cmd_flag_t;
OMPI_DECLSPEC int orte_gpr_base_pack_dump_all(orte_buffer_t *cmd); OMPI_DECLSPEC int orte_gpr_base_pack_dump_all(orte_buffer_t *cmd);
OMPI_DECLSPEC int orte_gpr_base_pack_dump_segments(orte_buffer_t *cmd, char *segment); OMPI_DECLSPEC int orte_gpr_base_pack_dump_segments(orte_buffer_t *cmd, char *segment);
OMPI_DECLSPEC int orte_gpr_base_pack_dump_triggers(orte_buffer_t *cmd); OMPI_DECLSPEC int orte_gpr_base_pack_dump_triggers(orte_buffer_t *cmd,
OMPI_DECLSPEC int orte_gpr_base_pack_dump_subscriptions(orte_buffer_t *cmd); orte_gpr_trigger_id_t start);
OMPI_DECLSPEC int orte_gpr_base_pack_dump_subscriptions(orte_buffer_t *cmd,
orte_gpr_subscription_id_t start);
OMPI_DECLSPEC int orte_gpr_base_pack_dump_a_trigger(orte_buffer_t *cmd,
char *name, orte_gpr_trigger_id_t id);
OMPI_DECLSPEC int orte_gpr_base_pack_dump_a_subscription(orte_buffer_t *cmd,
char *name,
orte_gpr_subscription_id_t id);
OMPI_DECLSPEC int orte_gpr_base_pack_dump_callbacks(orte_buffer_t *cmd); OMPI_DECLSPEC int orte_gpr_base_pack_dump_callbacks(orte_buffer_t *cmd);
OMPI_DECLSPEC int orte_gpr_base_print_dump(orte_buffer_t *buffer, int output_id); OMPI_DECLSPEC int orte_gpr_base_print_dump(orte_buffer_t *buffer, int output_id);
OMPI_DECLSPEC void orte_gpr_base_dump_keyval_value(orte_buffer_t *buffer, OMPI_DECLSPEC void orte_gpr_base_dump_keyval_value(orte_buffer_t *buffer,
@ -246,6 +267,9 @@ int orte_gpr_base_pack_notify_action(orte_buffer_t *buffer, void *src,
int orte_gpr_base_pack_trigger_action(orte_buffer_t *buffer, void *src, int orte_gpr_base_pack_trigger_action(orte_buffer_t *buffer, void *src,
size_t num_vals, orte_data_type_t type); size_t num_vals, orte_data_type_t type);
int orte_gpr_base_pack_notify_msg_type(orte_buffer_t *buffer, void *src,
size_t num_vals, orte_data_type_t type);
int orte_gpr_base_pack_addr_mode(orte_buffer_t *buffer, void *src, int orte_gpr_base_pack_addr_mode(orte_buffer_t *buffer, void *src,
size_t num_vals, orte_data_type_t type); size_t num_vals, orte_data_type_t type);
@ -286,6 +310,9 @@ int orte_gpr_base_unpack_trigger_action(orte_buffer_t *buffer, void *dest,
int orte_gpr_base_unpack_addr_mode(orte_buffer_t *buffer, void *dest, int orte_gpr_base_unpack_addr_mode(orte_buffer_t *buffer, void *dest,
size_t *num_vals, orte_data_type_t type); size_t *num_vals, orte_data_type_t type);
int orte_gpr_base_unpack_notify_msg_type(orte_buffer_t *buffer, void *dest,
size_t *num_vals, orte_data_type_t type);
int orte_gpr_base_unpack_keyval(orte_buffer_t *buffer, void *dest, int orte_gpr_base_unpack_keyval(orte_buffer_t *buffer, void *dest,
size_t *num_vals, orte_data_type_t type); size_t *num_vals, orte_data_type_t type);

Просмотреть файл

@ -116,6 +116,22 @@ int orte_gpr_base_pack_addr_mode(orte_buffer_t *buffer, void *src,
return rc; return rc;
} }
/*
* NOTIFY MSG TYPE
*/
int orte_gpr_base_pack_notify_msg_type(orte_buffer_t *buffer, void *src,
size_t num_vals, orte_data_type_t type)
{
int rc;
if (ORTE_SUCCESS != (rc = orte_dps_pack_buffer(buffer, src, num_vals, ORTE_GPR_NOTIFY_MSG_TYPE_T))) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/* /*
* KEYVAL * KEYVAL
*/ */
@ -347,7 +363,7 @@ int orte_gpr_base_pack_notify_data(orte_buffer_t *buffer, void *src,
/* pack the subscription name */ /* pack the subscription name */
if (ORTE_SUCCESS != (rc = orte_dps_pack_buffer(buffer, if (ORTE_SUCCESS != (rc = orte_dps_pack_buffer(buffer,
(void*)(&(data[i]->name)), 1, ORTE_STRING))) { (void*)(&(data[i]->target)), 1, ORTE_STRING))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
@ -410,9 +426,16 @@ int orte_gpr_base_pack_notify_msg(orte_buffer_t *buffer, void *src,
for (i=0; i<num_vals; i++) { for (i=0; i<num_vals; i++) {
/* pack the message type */
if (ORTE_SUCCESS != (rc = orte_dps_pack_buffer(buffer,
(void*)(&(msg[i]->msg_type)), 1, ORTE_GPR_NOTIFY_MSG_TYPE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the trigger name */ /* pack the trigger name */
if (ORTE_SUCCESS != (rc = orte_dps_pack_buffer(buffer, if (ORTE_SUCCESS != (rc = orte_dps_pack_buffer(buffer,
(void*)(&(msg[i]->name)), 1, ORTE_STRING))) { (void*)(&(msg[i]->target)), 1, ORTE_STRING))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }

Просмотреть файл

@ -101,6 +101,21 @@ int orte_gpr_base_unpack_trigger_action(orte_buffer_t *buffer, void *dest,
return rc; return rc;
} }
/*
* NOTIFY MSG TYPE
*/
int orte_gpr_base_unpack_notify_msg_type(orte_buffer_t *buffer, void *dest,
size_t *num_vals, orte_data_type_t type)
{
int rc;
if (ORTE_SUCCESS != (rc = orte_dps_unpack_buffer(buffer, dest, num_vals, ORTE_GPR_NOTIFY_MSG_TYPE_T))) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/* /*
* ADDR MODE * ADDR MODE
*/ */
@ -408,7 +423,7 @@ int orte_gpr_base_unpack_notify_data(orte_buffer_t *buffer, void *dest,
} }
/* unpack the subscription name */ /* unpack the subscription name */
if (ORTE_SUCCESS != (rc = orte_dps_unpack_buffer(buffer, &(data[i]->name), if (ORTE_SUCCESS != (rc = orte_dps_unpack_buffer(buffer, &(data[i]->target),
&max_n, ORTE_STRING))) { &max_n, ORTE_STRING))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
@ -479,8 +494,15 @@ int orte_gpr_base_unpack_notify_msg(orte_buffer_t *buffer, void *dest,
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
/* unpack the message type */
if (ORTE_SUCCESS != (rc = orte_dps_unpack_buffer(buffer, &(msg[i]->msg_type),
&max_n, ORTE_GPR_NOTIFY_MSG_TYPE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the trigger name */ /* unpack the trigger name */
if (ORTE_SUCCESS != (rc = orte_dps_unpack_buffer(buffer, &(msg[i]->name), if (ORTE_SUCCESS != (rc = orte_dps_unpack_buffer(buffer, &(msg[i]->target),
&max_n, ORTE_STRING))) { &max_n, ORTE_STRING))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;

Просмотреть файл

@ -137,7 +137,7 @@ OBJ_CLASS_INSTANCE(
/* constructor - used to initialize state of registry value instance */ /* constructor - used to initialize state of registry value instance */
static void orte_gpr_notify_data_construct(orte_gpr_notify_data_t* ptr) static void orte_gpr_notify_data_construct(orte_gpr_notify_data_t* ptr)
{ {
ptr->name = NULL; ptr->target = NULL;
ptr->id = ORTE_GPR_SUBSCRIPTION_ID_MAX; ptr->id = ORTE_GPR_SUBSCRIPTION_ID_MAX;
ptr->remove = false; ptr->remove = false;
ptr->cnt = 0; ptr->cnt = 0;
@ -153,7 +153,7 @@ static void orte_gpr_notify_data_destructor(orte_gpr_notify_data_t* ptr)
size_t i, j; size_t i, j;
orte_gpr_value_t **values; orte_gpr_value_t **values;
if (NULL != ptr->name) free(ptr->name); if (NULL != ptr->target) free(ptr->target);
if (NULL != ptr->values) { if (NULL != ptr->values) {
values = (orte_gpr_value_t**)(ptr->values)->addr; values = (orte_gpr_value_t**)(ptr->values)->addr;
@ -251,7 +251,8 @@ OBJ_CLASS_INSTANCE(
/* constructor - used to initialize notify message instance */ /* constructor - used to initialize notify message instance */
static void orte_gpr_notify_message_construct(orte_gpr_notify_message_t* msg) static void orte_gpr_notify_message_construct(orte_gpr_notify_message_t* msg)
{ {
msg->name = NULL; msg->msg_type = 0;
msg->target = NULL;
msg->id = ORTE_GPR_TRIGGER_ID_MAX; msg->id = ORTE_GPR_TRIGGER_ID_MAX;
msg->remove = false; msg->remove = false;
msg->cnt = 0; msg->cnt = 0;
@ -266,7 +267,7 @@ static void orte_gpr_notify_message_destructor(orte_gpr_notify_message_t* msg)
size_t i, j; size_t i, j;
orte_gpr_notify_data_t **data; orte_gpr_notify_data_t **data;
if (NULL != msg->name) free(msg->name); if (NULL != msg->target) free(msg->target);
if (NULL != msg->data) { if (NULL != msg->data) {
data = (orte_gpr_notify_data_t**)(msg->data)->addr; data = (orte_gpr_notify_data_t**)(msg->data)->addr;
@ -373,6 +374,14 @@ int orte_gpr_base_open(void)
return rc; return rc;
} }
tmp = ORTE_GPR_NOTIFY_MSG_TYPE;
if (ORTE_SUCCESS != (rc = orte_dps.register_type(orte_gpr_base_pack_notify_msg_type,
orte_gpr_base_unpack_notify_msg_type,
"ORTE_GPR_NOTIFY_MSG_TYPE", &tmp))) {
ORTE_ERROR_LOG(rc);
return rc;
}
tmp = ORTE_GPR_ADDR_MODE; tmp = ORTE_GPR_ADDR_MODE;
if (ORTE_SUCCESS != (rc = orte_dps.register_type(orte_gpr_base_pack_addr_mode, if (ORTE_SUCCESS != (rc = orte_dps.register_type(orte_gpr_base_pack_addr_mode,
orte_gpr_base_unpack_addr_mode, orte_gpr_base_unpack_addr_mode,

Просмотреть файл

@ -57,6 +57,7 @@ int orte_gpr_base_subscribe_1(orte_gpr_subscription_id_t *id,
/* assemble the subscription object */ /* assemble the subscription object */
subs = &sub; subs = &sub;
sub.name = sub_name;
sub.action = action; sub.action = action;
sub.cnt = 1; sub.cnt = 1;
values = &value; values = &value;
@ -222,6 +223,16 @@ int orte_gpr_base_define_trigger(orte_gpr_trigger_id_t *id,
size_t i, j; size_t i, j;
int rc; int rc;
/* check for error - this function can only be used to define triggers
* that compare their values to each other. It cannot be used to define
* triggers that fire when reaching a specified value as there is no
* way to specify a trigger level within this API
*/
if (ORTE_GPR_TRIG_AT_LEVEL & action) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
/* assemble the trigger object */ /* assemble the trigger object */
trigs = &trig; trigs = &trig;
trig.name = trig_name; trig.name = trig_name;
@ -281,3 +292,95 @@ int orte_gpr_base_define_trigger(orte_gpr_trigger_id_t *id,
return rc; return rc;
} }
int orte_gpr_base_define_trigger_level(orte_gpr_trigger_id_t *id,
char *trig_name,
orte_gpr_trigger_action_t action,
orte_gpr_addr_mode_t addr_mode,
char *segment,
char **tokens,
size_t n,
char **keys,
size_t *levels,
orte_gpr_trigger_cb_fn_t cbfunc,
void *user_tag)
{
orte_gpr_value_t *values;
orte_gpr_value_t value = { {OBJ_CLASS(opal_object_t),0},
ORTE_GPR_TOKENS_AND,
NULL, 0, NULL, 0, NULL };
orte_gpr_trigger_t *trigs;
orte_gpr_trigger_t trig = { {OBJ_CLASS(opal_object_t),0},
NULL, 0, 0, 0, NULL, 0, NULL };
size_t i, j;
int rc;
/* check for error - this function can only be used to define triggers
* that fire at a specified level. It cannot be used to define
* triggers that compare their values to each other
*/
if (ORTE_GPR_TRIG_CMP_LEVELS & action) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
}
/* assemble the trigger object */
trigs = &trig;
trig.name = trig_name;
trig.action = action;
trig.cnt = 1;
values = &value;
trig.values = &values;
trig.cbfunc = cbfunc;
trig.user_tag = user_tag;
value.addr_mode = addr_mode;
value.segment = segment;
value.cnt = n;
value.keyvals = (orte_gpr_keyval_t**)malloc(n * sizeof(orte_gpr_keyval_t*));
if (NULL == value.keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
for (i=0; i < n; i++) {
value.keyvals[i] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == value.keyvals[i]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
for (j=0; j < i; j++) OBJ_RELEASE(value.keyvals[j]);
free(value.keyvals);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value.keyvals[i]->key = keys[i];
value.keyvals[i]->type = ORTE_SIZE;
value.keyvals[i]->value.intval = levels[i];
}
value.tokens = tokens;
/* must count the number of tokens */
if (NULL == tokens) {
value.num_tokens = 0;
} else {
for (i=0; NULL != tokens[i]; i++) {
(value.num_tokens)++;
}
}
/* send the subscription */
if (ORTE_SUCCESS != (rc = orte_gpr.subscribe(0, NULL, 1, &trigs))) {
ORTE_ERROR_LOG(rc);
}
/* clean up memory - very carefully!
* We can't use the object destructors because we didn't
* copy input data fields into the objects. Thus, only
* release the data that we explicitly allocated
*/
for (i=0; i < n; i++) free(value.keyvals[i]);
free(value.keyvals);
/* return the subscription id */
*id = trig.id;
return rc;
}

Просмотреть файл

@ -54,6 +54,10 @@ int orte_gpr_base_xfer_payload(orte_gpr_value_union_t *dest,
dest->pid = src->pid; dest->pid = src->pid;
break; break;
case ORTE_INT:
dest->intval = src->intval;
break;
case ORTE_UINT8: case ORTE_UINT8:
dest->ui8 = src->ui8; dest->ui8 = src->ui8;
break; break;

Просмотреть файл

@ -60,22 +60,92 @@ int orte_gpr_base_pack_dump_segments(orte_buffer_t *cmd, char *segment)
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
int orte_gpr_base_pack_dump_triggers(orte_buffer_t *cmd) int orte_gpr_base_pack_dump_triggers(orte_buffer_t *cmd, orte_gpr_trigger_id_t start)
{ {
orte_gpr_cmd_flag_t command; orte_gpr_cmd_flag_t command;
int rc;
command = ORTE_GPR_DUMP_TRIGGERS_CMD; command = ORTE_GPR_DUMP_TRIGGERS_CMD;
return orte_dps.pack(cmd, &command, 1, ORTE_GPR_CMD); if (ORTE_SUCCESS != (rc = orte_dps.pack(cmd, &command, 1, ORTE_GPR_CMD))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_dps.pack(cmd, &start, 1, ORTE_GPR_TRIGGER_ID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
} }
int orte_gpr_base_pack_dump_subscriptions(orte_buffer_t *cmd) int orte_gpr_base_pack_dump_subscriptions(orte_buffer_t *cmd, orte_gpr_subscription_id_t start)
{ {
orte_gpr_cmd_flag_t command; orte_gpr_cmd_flag_t command;
int rc;
command = ORTE_GPR_DUMP_SUBSCRIPTIONS_CMD; command = ORTE_GPR_DUMP_SUBSCRIPTIONS_CMD;
return orte_dps.pack(cmd, &command, 1, ORTE_GPR_CMD); if (ORTE_SUCCESS != (rc = orte_dps.pack(cmd, &command, 1, ORTE_GPR_CMD))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_dps.pack(cmd, &start, 1, ORTE_GPR_SUBSCRIPTION_ID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
}
int orte_gpr_base_pack_dump_a_trigger(orte_buffer_t *cmd, char *name, orte_gpr_trigger_id_t id)
{
orte_gpr_cmd_flag_t command;
int rc;
command = ORTE_GPR_DUMP_A_TRIGGER_CMD;
if (ORTE_SUCCESS != (rc = orte_dps.pack(cmd, &command, 1, ORTE_GPR_CMD))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_dps.pack(cmd, &name, 1, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_dps.pack(cmd, &id, 1, ORTE_GPR_TRIGGER_ID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
}
int orte_gpr_base_pack_dump_a_subscription(orte_buffer_t *cmd, char *name,
orte_gpr_subscription_id_t id)
{
orte_gpr_cmd_flag_t command;
int rc;
command = ORTE_GPR_DUMP_A_SUBSCRIPTION_CMD;
if (ORTE_SUCCESS != (rc = orte_dps.pack(cmd, &command, 1, ORTE_GPR_CMD))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_dps.pack(cmd, &name, 1, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_dps.pack(cmd, &id, 1, ORTE_GPR_SUBSCRIPTION_ID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
} }
int orte_gpr_base_pack_dump_callbacks(orte_buffer_t *cmd) int orte_gpr_base_pack_dump_callbacks(orte_buffer_t *cmd)

Просмотреть файл

@ -52,10 +52,17 @@ int orte_gpr_base_dump_notify_msg(orte_buffer_t *buffer,
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
if (NULL == msg->name) { if (ORTE_GPR_TRIGGER_MSG == msg->msg_type) {
asprintf(&tmp_out, "\tTrigger name: NULL"); asprintf(&tmp_out, "TRIGGER message");
} else if (ORTE_GPR_SUBSCRIPTION_MSG == msg->msg_type) {
asprintf(&tmp_out, "SUBSCRIPTION message");
}
orte_gpr_base_dump_load_string(buffer, &tmp_out);
if (NULL == msg->target) {
asprintf(&tmp_out, "\tTrigger target: NULL");
} else { } else {
asprintf(&tmp_out, "\tTrigger name: %s", msg->name); asprintf(&tmp_out, "\tTrigger target: %s", msg->target);
} }
orte_gpr_base_dump_load_string(buffer, &tmp_out); orte_gpr_base_dump_load_string(buffer, &tmp_out);
@ -108,9 +115,9 @@ static void orte_gpr_base_dump_data(orte_buffer_t *buffer,
orte_gpr_value_t **values; orte_gpr_value_t **values;
size_t i, j; size_t i, j;
if (NULL != data->name) { if (NULL != data->target) {
asprintf(&tmp_out, "%lu values going to subscription name %s", asprintf(&tmp_out, "%lu values going to subscription target %s",
(unsigned long) data->cnt, data->name); (unsigned long) data->cnt, data->target);
} else { } else {
asprintf(&tmp_out, "%lu values going to subscription num %lu", asprintf(&tmp_out, "%lu values going to subscription num %lu",
(unsigned long) data->cnt, (unsigned long) data->id); (unsigned long) data->cnt, (unsigned long) data->id);
@ -259,7 +266,7 @@ void orte_gpr_base_dump_keyval_value(orte_buffer_t *buffer, orte_gpr_keyval_t *i
break; break;
case ORTE_PID: case ORTE_PID:
asprintf(&tmp_out, "\t\t\tData type: ORTE_PID:\tValue: " ORTE_PID_T_PRINTF, iptr->value.pid); asprintf(&tmp_out, "\t\t\tData type: ORTE_PID:\tValue: %lu", (unsigned long)iptr->value.pid);
orte_gpr_base_dump_load_string(buffer, &tmp_out); orte_gpr_base_dump_load_string(buffer, &tmp_out);
break; break;

Просмотреть файл

@ -501,6 +501,18 @@ typedef int (*orte_gpr_base_module_define_trigger_fn_t)(orte_gpr_trigger_id_t *i
orte_gpr_trigger_cb_fn_t cbfunc, orte_gpr_trigger_cb_fn_t cbfunc,
void *user_tag); void *user_tag);
typedef int (*orte_gpr_base_module_define_trigger_level_fn_t)(orte_gpr_trigger_id_t *id,
char *trig_name,
orte_gpr_trigger_action_t action,
orte_gpr_addr_mode_t addr_mode,
char *segment,
char **tokens,
size_t n,
char **keys,
size_t *levels,
orte_gpr_trigger_cb_fn_t cbfunc,
void *user_tag);
/* /*
* Cancel a subscription. * Cancel a subscription.
* Once a subscription has been entered on the registry, a caller may choose to permanently * Once a subscription has been entered on the registry, a caller may choose to permanently
@ -556,9 +568,23 @@ typedef int (*orte_gpr_base_module_dump_all_fn_t)(int output_id);
typedef int (*orte_gpr_base_module_dump_segment_fn_t)(char *segment, int output_id); typedef int (*orte_gpr_base_module_dump_segment_fn_t)(char *segment, int output_id);
typedef int (*orte_gpr_base_module_dump_triggers_fn_t)(int output_id); typedef int (*orte_gpr_base_module_dump_triggers_fn_t)(
orte_gpr_trigger_id_t tail,
int output_id);
typedef int (*orte_gpr_base_module_dump_subscriptions_fn_t)(int output_id); typedef int (*orte_gpr_base_module_dump_subscriptions_fn_t)(
orte_gpr_subscription_id_t tail,
int output_id);
typedef int (*orte_gpr_base_module_dump_a_trigger_fn_t)(
char *name,
orte_gpr_trigger_id_t id,
int output_id);
typedef int (*orte_gpr_base_module_dump_a_subscription_fn_t)(
char *name,
orte_gpr_subscription_id_t id,
int output_id);
typedef int (*orte_gpr_base_module_dump_local_triggers_fn_t)(int output_id); typedef int (*orte_gpr_base_module_dump_local_triggers_fn_t)(int output_id);
@ -602,6 +628,19 @@ typedef int (*orte_gpr_base_module_decrement_value_fn_t)(orte_gpr_value_t *value
typedef int (*orte_gpr_base_module_xfer_payload_fn_t)(orte_gpr_value_union_t *dest, typedef int (*orte_gpr_base_module_xfer_payload_fn_t)(orte_gpr_value_union_t *dest,
orte_gpr_value_union_t *src, orte_data_type_t type); orte_gpr_value_union_t *src, orte_data_type_t type);
/* Deliver a notify message
* To support the broadcast of stage gate messages that supply all subscribed
* data in a single message, we have to provide an API that allows the xcast
* to "inject" the message back into the registry's local delivery system.
*
* @param msg A pointer to the orte_gpr_notify_message_t object to be delivered.
* Note that the calling program is responsible for releasing this object.
*
* @retval None
*/
typedef int (*orte_gpr_base_module_deliver_notify_msg_t)(orte_gpr_notify_message_t *msg);
/* /*
* Ver 1.0.0 * Ver 1.0.0
*/ */
@ -625,6 +664,7 @@ struct orte_gpr_base_module_1_0_0_t {
/* GENERAL OPERATIONS */ /* GENERAL OPERATIONS */
orte_gpr_base_module_preallocate_segment_fn_t preallocate_segment; orte_gpr_base_module_preallocate_segment_fn_t preallocate_segment;
orte_gpr_base_module_xfer_payload_fn_t xfer_payload; orte_gpr_base_module_xfer_payload_fn_t xfer_payload;
orte_gpr_base_module_deliver_notify_msg_t deliver_notify_msg;
/* ARITHMETIC OPERATIONS */ /* ARITHMETIC OPERATIONS */
orte_gpr_base_module_increment_value_fn_t increment_value; orte_gpr_base_module_increment_value_fn_t increment_value;
orte_gpr_base_module_decrement_value_fn_t decrement_value; orte_gpr_base_module_decrement_value_fn_t decrement_value;
@ -633,6 +673,7 @@ struct orte_gpr_base_module_1_0_0_t {
orte_gpr_base_module_subscribe_1_fn_t subscribe_1; orte_gpr_base_module_subscribe_1_fn_t subscribe_1;
orte_gpr_base_module_subscribe_N_fn_t subscribe_N; orte_gpr_base_module_subscribe_N_fn_t subscribe_N;
orte_gpr_base_module_define_trigger_fn_t define_trigger; orte_gpr_base_module_define_trigger_fn_t define_trigger;
orte_gpr_base_module_define_trigger_level_fn_t define_trigger_level;
orte_gpr_base_module_unsubscribe_fn_t unsubscribe; orte_gpr_base_module_unsubscribe_fn_t unsubscribe;
orte_gpr_base_module_cancel_trigger_fn_t cancel_trigger; orte_gpr_base_module_cancel_trigger_fn_t cancel_trigger;
/* COMPOUND COMMANDS */ /* COMPOUND COMMANDS */
@ -644,6 +685,8 @@ struct orte_gpr_base_module_1_0_0_t {
orte_gpr_base_module_dump_segment_fn_t dump_segment; orte_gpr_base_module_dump_segment_fn_t dump_segment;
orte_gpr_base_module_dump_triggers_fn_t dump_triggers; orte_gpr_base_module_dump_triggers_fn_t dump_triggers;
orte_gpr_base_module_dump_subscriptions_fn_t dump_subscriptions; orte_gpr_base_module_dump_subscriptions_fn_t dump_subscriptions;
orte_gpr_base_module_dump_a_trigger_fn_t dump_a_trigger;
orte_gpr_base_module_dump_a_subscription_fn_t dump_a_subscription;
orte_gpr_base_module_dump_local_triggers_fn_t dump_local_triggers; orte_gpr_base_module_dump_local_triggers_fn_t dump_local_triggers;
orte_gpr_base_module_dump_local_subscriptions_fn_t dump_local_subscriptions; orte_gpr_base_module_dump_local_subscriptions_fn_t dump_local_subscriptions;
orte_gpr_base_module_dump_callbacks_fn_t dump_callbacks; orte_gpr_base_module_dump_callbacks_fn_t dump_callbacks;

Просмотреть файл

@ -120,6 +120,7 @@ typedef union { /* shared storage for the value */
size_t size; size_t size;
bool tf_flag; bool tf_flag;
pid_t pid; pid_t pid;
int intval;
uint8_t ui8; uint8_t ui8;
uint16_t ui16; uint16_t ui16;
uint32_t ui32; uint32_t ui32;
@ -188,7 +189,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(orte_gpr_value_t);
*/ */
typedef struct { typedef struct {
opal_object_t super; /**< Makes this an object */ opal_object_t super; /**< Makes this an object */
char *name; /**< Name of the associated subscripton, if provided */ char *target; /**< Name of the associated subscripton, if provided */
orte_gpr_subscription_id_t id; /**< Number of the associated subscription */ orte_gpr_subscription_id_t id; /**< Number of the associated subscription */
bool remove; /**< Remove this subscription from recipient's tracker */ bool remove; /**< Remove this subscription from recipient's tracker */
size_t cnt; /**< Number of value objects returned, one per container */ size_t cnt; /**< Number of value objects returned, one per container */
@ -199,10 +200,17 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(orte_gpr_notify_data_t);
/** Return message for notify requests /** Return message for notify requests
*/ */
typedef uint8_t orte_gpr_notify_msg_type_t;
#define ORTE_GPR_NOTIFY_MSG_TYPE_T ORTE_UINT8
#define ORTE_GPR_TRIGGER_MSG (orte_gpr_notify_msg_type_t)0x01
#define ORTE_GPR_SUBSCRIPTION_MSG (orte_gpr_notify_msg_type_t)0x02
typedef struct { typedef struct {
opal_object_t super; /**< Make this an object */ opal_object_t super; /**< Make this an object */
char *name; /**< Name of the associated trigger, if provided */ orte_gpr_notify_msg_type_t msg_type; /**< trigger or subscription msg */
orte_gpr_trigger_id_t id; /**< trigger id, if message comes from trigger (ORTE_GPR_TRIGGER_ID_MAX otherwise) */ char *target; /**< Name of the associated trigger, if provided */
orte_gpr_trigger_id_t id; /**< trigger id, if message comes from trigger
(ORTE_GPR_TRIGGER_ID_MAX otherwise) */
bool remove; /**< Remove this trigger from recipient's tracker */ bool remove; /**< Remove this trigger from recipient's tracker */
size_t cnt; /**< number of data objects */ size_t cnt; /**< number of data objects */
orte_pointer_array_t *data; /**< Contiguous array of pointers to data objects */ orte_pointer_array_t *data; /**< Contiguous array of pointers to data objects */
@ -221,8 +229,10 @@ typedef void (*orte_gpr_notify_cb_fn_t)(orte_gpr_notify_data_t *notify_data, voi
* notify_msg = message containing multiple blocks of data provided by trigger * notify_msg = message containing multiple blocks of data provided by trigger
* *
* user_tag = whatever tag data the user provided when filing the subscription * user_tag = whatever tag data the user provided when filing the subscription
*
* Since this only takes place locally, we CAN get a status code from the callback!
*/ */
typedef void (*orte_gpr_trigger_cb_fn_t)(orte_gpr_notify_message_t *msg, void *user_tag); typedef int (*orte_gpr_trigger_cb_fn_t)(orte_gpr_notify_message_t *msg);
/** Structure for registering subscriptions /** Structure for registering subscriptions
* A request to be notified when certain events occur, or when counters reach specified * A request to be notified when certain events occur, or when counters reach specified

Просмотреть файл

@ -174,13 +174,13 @@ orte_gpr_null_dump_segments(char *segment, int output_id)
} }
static int static int
orte_gpr_null_dump_triggers(int output_id) orte_gpr_null_dump_triggers(orte_gpr_trigger_id_t start, int output_id)
{ {
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
static int static int
orte_gpr_null_dump_subscriptions(int output_id) orte_gpr_null_dump_subscriptions(orte_gpr_subscription_id_t start, int output_id)
{ {
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -306,8 +306,41 @@ static int orte_gpr_null_define_trigger(orte_gpr_trigger_id_t *id,
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
static int orte_gpr_null_define_trigger_level(orte_gpr_trigger_id_t *id,
char *trig_name,
orte_gpr_trigger_action_t action,
orte_gpr_addr_mode_t addr_mode,
char *segment,
char **tokens,
size_t n,
char **keys,
size_t *levels,
orte_gpr_trigger_cb_fn_t cbfunc,
void *user_tag)
{
return ORTE_SUCCESS;
}
static int orte_gpr_null_deliver_notify_msg(orte_gpr_notify_message_t *msg)
{
return ORTE_SUCCESS;
}
static int orte_gpr_null_dump_a_trigger(
char *name,
orte_gpr_trigger_id_t id,
int output_id)
{
return ORTE_SUCCESS;
}
static int orte_gpr_null_dump_a_subscription(
char *name,
orte_gpr_subscription_id_t id,
int output_id)
{
return ORTE_SUCCESS;
}
/* /*
* setup the function pointers for the module * setup the function pointers for the module
*/ */
@ -331,6 +364,7 @@ orte_gpr_base_module_t orte_gpr_null_module = {
/* GENERAL OPERATIONS */ /* GENERAL OPERATIONS */
orte_gpr_null_preallocate_segment, orte_gpr_null_preallocate_segment,
orte_gpr_null_xfer_payload, orte_gpr_null_xfer_payload,
orte_gpr_null_deliver_notify_msg,
/* ARITHMETIC OPERATIONS */ /* ARITHMETIC OPERATIONS */
orte_gpr_null_increment_value, orte_gpr_null_increment_value,
orte_gpr_null_decrement_value, orte_gpr_null_decrement_value,
@ -339,6 +373,7 @@ orte_gpr_base_module_t orte_gpr_null_module = {
orte_gpr_null_subscribe_1, orte_gpr_null_subscribe_1,
orte_gpr_null_subscribe_N, orte_gpr_null_subscribe_N,
orte_gpr_null_define_trigger, orte_gpr_null_define_trigger,
orte_gpr_null_define_trigger_level,
orte_gpr_null_unsubscribe, orte_gpr_null_unsubscribe,
orte_gpr_null_cancel_trigger, orte_gpr_null_cancel_trigger,
/* COMPOUND COMMANDS */ /* COMPOUND COMMANDS */
@ -350,6 +385,8 @@ orte_gpr_base_module_t orte_gpr_null_module = {
orte_gpr_null_dump_segments, orte_gpr_null_dump_segments,
orte_gpr_null_dump_triggers, orte_gpr_null_dump_triggers,
orte_gpr_null_dump_subscriptions, orte_gpr_null_dump_subscriptions,
orte_gpr_null_dump_a_trigger,
orte_gpr_null_dump_a_subscription,
orte_gpr_null_dump_local_triggers, orte_gpr_null_dump_local_triggers,
orte_gpr_null_dump_local_subscriptions, orte_gpr_null_dump_local_subscriptions,
orte_gpr_null_dump_callbacks, orte_gpr_null_dump_callbacks,

Просмотреть файл

@ -19,16 +19,17 @@
include $(top_ompi_srcdir)/config/Makefile.options include $(top_ompi_srcdir)/config/Makefile.options
sources = \ sources = \
gpr_proxy_arithmetic_ops.c \
gpr_proxy_cleanup.c \
gpr_proxy_component.c \ gpr_proxy_component.c \
gpr_proxy_compound_cmd.c \ gpr_proxy_compound_cmd.c \
gpr_proxy_del_index.c \ gpr_proxy_del_index.c \
gpr_proxy_cleanup.c \ gpr_proxy_deliver_notify_msg.c \
gpr_proxy_dump.c \ gpr_proxy_dump.c \
gpr_proxy_dump_local_trigs_subs.c \ gpr_proxy_dump_local_trigs_subs.c \
gpr_proxy_general_operations.c \
gpr_proxy_internals.c \ gpr_proxy_internals.c \
gpr_proxy_put_get.c \ gpr_proxy_put_get.c \
gpr_proxy_general_operations.c \
gpr_proxy_arithmetic_ops.c \
gpr_proxy_subscribe.c \ gpr_proxy_subscribe.c \
gpr_proxy.h gpr_proxy.h

Просмотреть файл

@ -55,6 +55,7 @@ int orte_gpr_proxy_finalize(void);
typedef struct { typedef struct {
opal_object_t super; /**< Allows this to be an object */ opal_object_t super; /**< Allows this to be an object */
orte_gpr_subscription_id_t id; /**< id of this subscription */ orte_gpr_subscription_id_t id; /**< id of this subscription */
size_t index; /**< location of this subscription in array */
char *name; char *name;
orte_gpr_notify_cb_fn_t callback; /**< Function to be called for notificaiton */ orte_gpr_notify_cb_fn_t callback; /**< Function to be called for notificaiton */
void *user_tag; /**< User-provided tag for callback function */ void *user_tag; /**< User-provided tag for callback function */
@ -66,6 +67,7 @@ OBJ_CLASS_DECLARATION(orte_gpr_proxy_subscriber_t);
typedef struct { typedef struct {
opal_object_t super; /**< Allows this to be an object */ opal_object_t super; /**< Allows this to be an object */
orte_gpr_trigger_id_t id; /**< id of this trigger */ orte_gpr_trigger_id_t id; /**< id of this trigger */
size_t index; /**< location of this trigger in array */
char *name; char *name;
orte_gpr_trigger_cb_fn_t callback; /**< Function to be called for notification */ orte_gpr_trigger_cb_fn_t callback; /**< Function to be called for notification */
void *user_tag; /**< User-provided tag for callback function */ void *user_tag; /**< User-provided tag for callback function */
@ -177,9 +179,17 @@ int orte_gpr_proxy_dump_all(int output_id);
int orte_gpr_proxy_dump_segments(char *segment, int output_id); int orte_gpr_proxy_dump_segments(char *segment, int output_id);
int orte_gpr_proxy_dump_triggers(int output_id); int orte_gpr_proxy_dump_triggers(orte_gpr_trigger_id_t start, int output_id);
int orte_gpr_proxy_dump_subscriptions(int output_id); int orte_gpr_proxy_dump_subscriptions(orte_gpr_subscription_id_t start, int output_id);
int orte_gpr_proxy_dump_a_trigger(char *name,
orte_gpr_trigger_id_t id,
int output_id);
int orte_gpr_proxy_dump_a_subscription(char *name,
orte_gpr_subscription_id_t id,
int output_id);
int orte_gpr_proxy_dump_local_triggers(int output_id); int orte_gpr_proxy_dump_local_triggers(int output_id);
@ -198,6 +208,8 @@ int orte_gpr_proxy_dump_value(orte_gpr_value_t *value, int output_id);
*/ */
int orte_gpr_proxy_preallocate_segment(char *name, size_t num_slots); int orte_gpr_proxy_preallocate_segment(char *name, size_t num_slots);
int orte_gpr_proxy_deliver_notify_msg(orte_gpr_notify_message_t *msg);
/* /*
* Functions that interface to the replica * Functions that interface to the replica
*/ */
@ -214,14 +226,14 @@ int
orte_gpr_proxy_enter_subscription(size_t cnt, orte_gpr_subscription_t **subscriptions); orte_gpr_proxy_enter_subscription(size_t cnt, orte_gpr_subscription_t **subscriptions);
int int
orte_gpr_proxy_remove_subscription(orte_gpr_subscription_id_t id); orte_gpr_proxy_remove_subscription(orte_gpr_proxy_subscriber_t *sub);
int int
orte_gpr_proxy_enter_trigger(size_t cnt, orte_gpr_trigger_t **triggers); orte_gpr_proxy_enter_trigger(size_t cnt, orte_gpr_trigger_t **triggers);
int int
orte_gpr_proxy_remove_trigger(orte_gpr_trigger_id_t id); orte_gpr_proxy_remove_trigger(orte_gpr_proxy_trigger_t *trig);
#if defined(c_plusplus) || defined(__cplusplus) #if defined(c_plusplus) || defined(__cplusplus)
} }

Просмотреть файл

@ -84,6 +84,7 @@ static orte_gpr_base_module_t orte_gpr_proxy = {
/* GENERAL OPERATIONS */ /* GENERAL OPERATIONS */
orte_gpr_proxy_preallocate_segment, orte_gpr_proxy_preallocate_segment,
orte_gpr_base_xfer_payload, orte_gpr_base_xfer_payload,
orte_gpr_proxy_deliver_notify_msg,
/* ARITHMETIC OPERATIONS */ /* ARITHMETIC OPERATIONS */
orte_gpr_proxy_increment_value, orte_gpr_proxy_increment_value,
orte_gpr_proxy_decrement_value, orte_gpr_proxy_decrement_value,
@ -92,6 +93,7 @@ static orte_gpr_base_module_t orte_gpr_proxy = {
orte_gpr_base_subscribe_1, orte_gpr_base_subscribe_1,
orte_gpr_base_subscribe_N, orte_gpr_base_subscribe_N,
orte_gpr_base_define_trigger, orte_gpr_base_define_trigger,
orte_gpr_base_define_trigger_level,
orte_gpr_proxy_unsubscribe, orte_gpr_proxy_unsubscribe,
orte_gpr_proxy_cancel_trigger, orte_gpr_proxy_cancel_trigger,
/* COMPOUND COMMANDS */ /* COMPOUND COMMANDS */
@ -103,6 +105,8 @@ static orte_gpr_base_module_t orte_gpr_proxy = {
orte_gpr_proxy_dump_segments, orte_gpr_proxy_dump_segments,
orte_gpr_proxy_dump_triggers, orte_gpr_proxy_dump_triggers,
orte_gpr_proxy_dump_subscriptions, orte_gpr_proxy_dump_subscriptions,
orte_gpr_proxy_dump_a_trigger,
orte_gpr_proxy_dump_a_subscription,
orte_gpr_proxy_dump_local_triggers, orte_gpr_proxy_dump_local_triggers,
orte_gpr_proxy_dump_local_subscriptions, orte_gpr_proxy_dump_local_subscriptions,
orte_gpr_proxy_dump_callbacks, orte_gpr_proxy_dump_callbacks,
@ -321,17 +325,9 @@ void orte_gpr_proxy_notify_recv(int status, orte_process_name_t* sender,
{ {
orte_gpr_cmd_flag_t command; orte_gpr_cmd_flag_t command;
orte_gpr_notify_message_t *msg; orte_gpr_notify_message_t *msg;
orte_gpr_notify_data_t **data; size_t n;
orte_gpr_proxy_subscriber_t *sub;
orte_gpr_proxy_trigger_t *trig;
size_t i, n;
int rc; int rc;
if (orte_gpr_proxy_globals.debug) {
opal_output(0, "[%lu,%lu,%lu] gpr_proxy_notify_recv: received trigger message",
ORTE_NAME_ARGS(orte_process_info.my_name));
}
n = 1; n = 1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(buffer, &command, &n, ORTE_GPR_CMD))) { if (ORTE_SUCCESS != (rc = orte_dps.unpack(buffer, &command, &n, ORTE_GPR_CMD))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
@ -356,61 +352,16 @@ void orte_gpr_proxy_notify_recv(int status, orte_process_name_t* sender,
goto RETURN_ERROR; goto RETURN_ERROR;
} }
/* if the message trigger id is valid (i.e., it is set to /* process the message */
* something other than ORTE_GPR_TRIGGER_ID_MAX), then this if (ORTE_SUCCESS != (rc = orte_gpr_proxy_deliver_notify_msg(msg))) {
* is an aggregated message intended for a single receiver.
* In that case, look up the associated TRIGGER id and pass
* the entire message to that receiver.
*/
if (ORTE_GPR_TRIGGER_ID_MAX > msg->id) {
trig = (orte_gpr_proxy_globals.triggers)->addr[msg->id];
if (NULL == trig) {
ORTE_ERROR_LOG(ORTE_ERR_GPR_DATA_CORRUPT);
} else {
trig->callback(msg, sub->user_tag);
}
if (msg->remove) {
if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_trigger(msg->id))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
}
}
OBJ_RELEASE(msg); OBJ_RELEASE(msg);
goto RETURN_ERROR; goto RETURN_ERROR;
} }
/* if the message trigger id was NOT valid, then we split the
* message into its component datagrams and send each of them
* separately to their rescpective subscriber.
*/
if (msg->cnt > 0) {
data = (orte_gpr_notify_data_t**)(msg->data)->addr;
for (i=0; i < msg->cnt; i++) {
/* for speed purposes, we take advantage here of
* our knowledge on how this pointer array was
* constructed - we know that it is contiguous
* and that there are no NULL gaps in it.
*/
/* process request */
if (data[i]->id > orte_gpr_proxy_globals.num_subs) {
ORTE_ERROR_LOG(ORTE_ERR_GPR_DATA_CORRUPT);
continue;
}
sub = (orte_gpr_proxy_globals.subscriptions)->addr[data[i]->id];
if (NULL == sub) {
ORTE_ERROR_LOG(ORTE_ERR_GPR_DATA_CORRUPT);
} else {
sub->callback(data[i], sub->user_tag);
}
if (data[i]->remove) {
if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_subscription(data[i]->id))) {
ORTE_ERROR_LOG(rc);
}
}
}
/* release data */ /* release data */
OBJ_RELEASE(msg); OBJ_RELEASE(msg);
}
RETURN_ERROR: RETURN_ERROR:

Просмотреть файл

@ -44,46 +44,66 @@ int orte_gpr_proxy_deliver_notify_msg(orte_gpr_notify_message_t *msg)
{ {
orte_gpr_notify_data_t **data; orte_gpr_notify_data_t **data;
orte_gpr_proxy_subscriber_t **subs, *sub; orte_gpr_proxy_subscriber_t **subs, *sub;
orte_gpr_proxy_trigger_t *trig; orte_gpr_proxy_trigger_t *trig, **trigs;
size_t i, j, k; size_t i, j, k, n;
bool processed; bool processed;
int rc; int rc;
/* if the message trigger id is valid (i.e., it is set to /* we first have to check if the message is a trigger message - if so,
* something other than ORTE_GPR_TRIGGER_ID_MAX), then this * then the message is intended to be
* is an aggregated message intended for a single receiver. * sent as a single block to that trigger's callback function.
* In that case, look up the associated TRIGGER id and pass
* the entire message to that receiver.
*/ */
if (ORTE_GPR_TRIGGER_ID_MAX > msg->id) { if (ORTE_GPR_TRIGGER_MSG == msg->msg_type) {
trig = (orte_gpr_proxy_globals.triggers)->addr[msg->id]; trig = (orte_gpr_proxy_globals.triggers)->addr[msg->id];
if (NULL == trig) { if (NULL == trig) {
ORTE_ERROR_LOG(ORTE_ERR_GPR_DATA_CORRUPT); ORTE_ERROR_LOG(ORTE_ERR_GPR_DATA_CORRUPT);
opal_output(0, "Trigger id: %lu", (unsigned long)msg->id);
orte_gpr.dump_local_triggers(0);
return ORTE_ERR_GPR_DATA_CORRUPT; return ORTE_ERR_GPR_DATA_CORRUPT;
} else { } else {
trig->callback(msg); trig->callback(msg);
} }
if (msg->remove) { if (msg->remove) {
if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_trigger(msg->id))) { /* remove the specified trigger from the local tracker */
trigs = (orte_gpr_proxy_trigger_t**)(orte_gpr_proxy_globals.triggers)->addr;
for (i=0, j=0; j < orte_gpr_proxy_globals.num_trigs &&
i < (orte_gpr_proxy_globals.triggers)->size; i++) {
if (NULL != trigs[i]){
j++;
if (msg->id == trigs[i]->id) {
if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_trigger(trigs[i]))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
}
OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex);
return rc; return rc;
} }
} }
}
/* must not have been found - report error */
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
/* if the message trigger id was NOT valid, then we split the /* get here if this wasn't a trigger message. Only other allowed message type
* message into its component datagrams and send each of them * is a subscription message - if that isn't the case, then we have corrupt
* separately to their respective subscriber. * data, so flag it and return
*/
if (ORTE_GPR_SUBSCRIPTION_MSG != msg->msg_type) {
ORTE_ERROR_LOG(ORTE_ERR_GPR_DATA_CORRUPT);
return ORTE_ERR_GPR_DATA_CORRUPT;
}
/* get here if we have a subscription message - i.e., the message should
* be broken into its component parts and delivered separately
* to the indicated subscribers
*/ */
data = (orte_gpr_notify_data_t**)(msg->data)->addr; data = (orte_gpr_notify_data_t**)(msg->data)->addr;
orte_gpr.dump_local_subscriptions(0); for (i=0, n=0; n < msg->cnt &&
for (i=0; i < msg->cnt; i++) { i < (msg->data)->size; i++) {
opal_output(0, "[%lu,%lu,%lu] Sub data id %lu", ORTE_NAME_ARGS(orte_process_info.my_name), (unsigned long)data[i]->id); if (NULL != data[i]) {
opal_output(0, "\tSub name %s", data[i]->target); n++;
if (ORTE_GPR_SUBSCRIPTION_ID_MAX != data[i]->id || NULL != data[i]->target) {
/* for each datagram in the message, we need to lookup /* for each datagram in the message, we need to lookup
* the associated subscription (could be specified by name or id) to find the correct * the associated subscription (could be specified by name or id) to find the correct
* callback function. Name specifications are given precedence over id. * callback function. Name specifications are given precedence over id.
@ -96,10 +116,15 @@ opal_output(0, "\tSub name %s", data[i]->target);
j < (orte_gpr_proxy_globals.subscriptions)->size; j++) { j < (orte_gpr_proxy_globals.subscriptions)->size; j++) {
if (NULL != subs[j]) { if (NULL != subs[j]) {
k++; k++;
if ((NULL != subs[j]->name && if (NULL != data[i]->target) {
NULL != data[i]->target && /* if target name provided, must use it */
0 == strcmp(data[i]->target, subs[j]->name)) || if (NULL != subs[j]->name &&
(data[i]->id == subs[j]->id)) { 0 == strcmp(data[i]->target, subs[j]->name)) {
sub = subs[j];
processed = true;
}
} else if (data[i]->id == subs[j]->id) {
/* otherwise, see if id's match */
sub = subs[j]; sub = subs[j];
processed = true; processed = true;
} }
@ -121,6 +146,8 @@ opal_output(0, "\tSub name %s", data[i]->target);
} }
} }
} }
}
}
/* all done */ /* all done */
return ORTE_SUCCESS; return ORTE_SUCCESS;

Просмотреть файл

@ -167,7 +167,7 @@ int orte_gpr_proxy_dump_segments(char *segment, int output_id)
return rc; return rc;
} }
int orte_gpr_proxy_dump_triggers(int output_id) int orte_gpr_proxy_dump_triggers(orte_gpr_trigger_id_t start, int output_id)
{ {
orte_gpr_cmd_flag_t command; orte_gpr_cmd_flag_t command;
orte_buffer_t *cmd; orte_buffer_t *cmd;
@ -176,7 +176,7 @@ int orte_gpr_proxy_dump_triggers(int output_id)
size_t n; size_t n;
if (orte_gpr_proxy_globals.compound_cmd_mode) { if (orte_gpr_proxy_globals.compound_cmd_mode) {
return orte_gpr_base_pack_dump_triggers(orte_gpr_proxy_globals.compound_cmd); return orte_gpr_base_pack_dump_triggers(orte_gpr_proxy_globals.compound_cmd, start);
} }
cmd = OBJ_NEW(orte_buffer_t); cmd = OBJ_NEW(orte_buffer_t);
@ -185,7 +185,7 @@ int orte_gpr_proxy_dump_triggers(int output_id)
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_dump_triggers(cmd))) { if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_dump_triggers(cmd, start))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
OBJ_RELEASE(cmd); OBJ_RELEASE(cmd);
return rc; return rc;
@ -228,7 +228,7 @@ int orte_gpr_proxy_dump_triggers(int output_id)
return rc; return rc;
} }
int orte_gpr_proxy_dump_subscriptions(int output_id) int orte_gpr_proxy_dump_subscriptions(orte_gpr_subscription_id_t start, int output_id)
{ {
orte_gpr_cmd_flag_t command; orte_gpr_cmd_flag_t command;
orte_buffer_t *cmd; orte_buffer_t *cmd;
@ -237,7 +237,7 @@ int orte_gpr_proxy_dump_subscriptions(int output_id)
size_t n; size_t n;
if (orte_gpr_proxy_globals.compound_cmd_mode) { if (orte_gpr_proxy_globals.compound_cmd_mode) {
return orte_gpr_base_pack_dump_subscriptions(orte_gpr_proxy_globals.compound_cmd); return orte_gpr_base_pack_dump_subscriptions(orte_gpr_proxy_globals.compound_cmd, start);
} }
cmd = OBJ_NEW(orte_buffer_t); cmd = OBJ_NEW(orte_buffer_t);
@ -246,7 +246,7 @@ int orte_gpr_proxy_dump_subscriptions(int output_id)
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_dump_subscriptions(cmd))) { if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_dump_subscriptions(cmd, start))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
OBJ_RELEASE(cmd); OBJ_RELEASE(cmd);
return rc; return rc;
@ -289,6 +289,134 @@ int orte_gpr_proxy_dump_subscriptions(int output_id)
return rc; return rc;
} }
int orte_gpr_proxy_dump_a_trigger(char *name,
orte_gpr_trigger_id_t id,
int output_id)
{
orte_gpr_cmd_flag_t command;
orte_buffer_t *cmd;
orte_buffer_t *answer;
int rc;
size_t n;
if (orte_gpr_proxy_globals.compound_cmd_mode) {
return orte_gpr_base_pack_dump_a_trigger(orte_gpr_proxy_globals.compound_cmd, name, id);
}
cmd = OBJ_NEW(orte_buffer_t);
if (NULL == cmd) { /* got a problem */
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_dump_a_trigger(cmd, name, id))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(cmd);
return rc;
}
if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, cmd, ORTE_RML_TAG_GPR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
return ORTE_ERR_COMM_FAILURE;
}
answer = OBJ_NEW(orte_buffer_t);
if (NULL == answer) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
return ORTE_ERR_COMM_FAILURE;
}
n = 1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(answer, &command, &n, ORTE_GPR_CMD))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(answer);
return rc;
}
if (ORTE_GPR_DUMP_TRIGGERS_CMD != command) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;
}
if (ORTE_SUCCESS != (rc = orte_gpr_base_print_dump(answer, output_id))) {
ORTE_ERROR_LOG(rc);
}
OBJ_RELEASE(answer);
return rc;
}
int orte_gpr_proxy_dump_a_subscription(char *name,
orte_gpr_subscription_id_t id,
int output_id)
{
orte_gpr_cmd_flag_t command;
orte_buffer_t *cmd;
orte_buffer_t *answer;
int rc;
size_t n;
if (orte_gpr_proxy_globals.compound_cmd_mode) {
return orte_gpr_base_pack_dump_a_subscription(orte_gpr_proxy_globals.compound_cmd, name, id);
}
cmd = OBJ_NEW(orte_buffer_t);
if (NULL == cmd) { /* got a problem */
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (ORTE_SUCCESS != (rc = orte_gpr_base_pack_dump_a_subscription(cmd, name, id))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(cmd);
return rc;
}
if (0 > orte_rml.send_buffer(orte_process_info.gpr_replica, cmd, ORTE_RML_TAG_GPR, 0)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
return ORTE_ERR_COMM_FAILURE;
}
answer = OBJ_NEW(orte_buffer_t);
if (NULL == answer) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (0 > orte_rml.recv_buffer(orte_process_info.gpr_replica, answer, ORTE_RML_TAG_GPR)) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
return ORTE_ERR_COMM_FAILURE;
}
n = 1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(answer, &command, &n, ORTE_GPR_CMD))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(answer);
return rc;
}
if (ORTE_GPR_DUMP_TRIGGERS_CMD != command) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
OBJ_RELEASE(answer);
return ORTE_ERR_COMM_FAILURE;
}
if (ORTE_SUCCESS != (rc = orte_gpr_base_print_dump(answer, output_id))) {
ORTE_ERROR_LOG(rc);
}
OBJ_RELEASE(answer);
return rc;
}
int orte_gpr_proxy_dump_callbacks(int output_id) int orte_gpr_proxy_dump_callbacks(int output_id)
{ {
orte_gpr_cmd_flag_t command; orte_gpr_cmd_flag_t command;

Просмотреть файл

@ -35,7 +35,7 @@ int
orte_gpr_proxy_enter_subscription(size_t cnt, orte_gpr_subscription_t **subscriptions) orte_gpr_proxy_enter_subscription(size_t cnt, orte_gpr_subscription_t **subscriptions)
{ {
orte_gpr_proxy_subscriber_t *sub; orte_gpr_proxy_subscriber_t *sub;
size_t i, id; size_t i;
for (i=0; i < cnt; i++) { for (i=0; i < cnt; i++) {
sub = OBJ_NEW(orte_gpr_proxy_subscriber_t); sub = OBJ_NEW(orte_gpr_proxy_subscriber_t);
@ -48,7 +48,7 @@ orte_gpr_proxy_enter_subscription(size_t cnt, orte_gpr_subscription_t **subscrip
} }
sub->callback = subscriptions[i]->cbfunc; sub->callback = subscriptions[i]->cbfunc;
sub->user_tag = subscriptions[i]->user_tag; sub->user_tag = subscriptions[i]->user_tag;
if (0 > orte_pointer_array_add(&id, orte_gpr_proxy_globals.subscriptions, sub)) { if (0 > orte_pointer_array_add(&sub->index, orte_gpr_proxy_globals.subscriptions, sub)) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
@ -64,10 +64,45 @@ orte_gpr_proxy_enter_subscription(size_t cnt, orte_gpr_subscription_t **subscrip
int int
orte_gpr_proxy_enter_trigger(size_t cnt, orte_gpr_trigger_t **trigs) orte_gpr_proxy_enter_trigger(size_t cnt, orte_gpr_trigger_t **trigs)
{ {
orte_gpr_proxy_trigger_t *trig; orte_gpr_proxy_trigger_t *trig, **tptr;
size_t i, id; size_t i, j, k;
for (i=0; i < cnt; i++) { for (i=0; i < cnt; i++) {
/* If the provided trigger has a name, see if it already is on
* the local trigger list. If so, then check to see if we
* already defined a return point for it and/or if this trigger
* doesn't - in either of those two cases, we ignore the
* trigger and just use the existing entry
*/
if (NULL != trigs[i]->name) {
tptr = (orte_gpr_proxy_trigger_t**)(orte_gpr_proxy_globals.triggers)->addr;
for (j=0, k=0; k < orte_gpr_proxy_globals.num_trigs &&
j < (orte_gpr_proxy_globals.triggers)->size; j++) {
if (NULL != tptr[j]) {
k++;
if (0 == strcmp(tptr[j]->name, trigs[i]->name)) {
/* same name - trigger is already on list */
if (NULL != tptr[j]->callback || NULL == trigs[i]->cbfunc) {
/* ignore these cases */
trig = tptr[j];
goto MOVEON;
}
/* reach here if either the prior trigger didn't provide
* a callback, and the new one provides one. In this
* case, we update the existing trigger callback and then
* move on
*/
tptr[j]->callback = trigs[i]->cbfunc;
trig = tptr[j];
goto MOVEON;
}
}
}
}
/* either the trigger doesn't have a name, OR it did, but it isn't
* already on the list - add it to the list now
*/
trig = OBJ_NEW(orte_gpr_proxy_trigger_t); trig = OBJ_NEW(orte_gpr_proxy_trigger_t);
if (NULL == trig) { if (NULL == trig) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
@ -89,13 +124,14 @@ orte_gpr_proxy_enter_trigger(size_t cnt, orte_gpr_trigger_t **trigs)
} }
trig->callback = trigs[i]->cbfunc; trig->callback = trigs[i]->cbfunc;
trig->user_tag = trigs[i]->user_tag; trig->user_tag = trigs[i]->user_tag;
if (0 > orte_pointer_array_add(&id, orte_gpr_proxy_globals.triggers, trig)) { if (0 > orte_pointer_array_add(&trig->index, orte_gpr_proxy_globals.triggers, trig)) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
trig->id = orte_gpr_proxy_globals.num_trigs; trig->id = orte_gpr_proxy_globals.num_trigs;
trigs[i]->id = trig->id;
(orte_gpr_proxy_globals.num_trigs)++; (orte_gpr_proxy_globals.num_trigs)++;
MOVEON:
trigs[i]->id = trig->id;
} }
return ORTE_SUCCESS; return ORTE_SUCCESS;
@ -103,24 +139,36 @@ orte_gpr_proxy_enter_trigger(size_t cnt, orte_gpr_trigger_t **trigs)
int int
orte_gpr_proxy_remove_subscription(orte_gpr_subscription_id_t id) orte_gpr_proxy_remove_subscription(orte_gpr_proxy_subscriber_t *sub)
{ {
if (NULL != (orte_gpr_proxy_globals.subscriptions)->addr[id]) { size_t index;
OBJ_RELEASE((orte_gpr_proxy_globals.subscriptions)->addr[id]);
orte_pointer_array_set_item(orte_gpr_proxy_globals.subscriptions, (size_t)id, NULL); if (NULL == sub) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
} }
index = sub->index;
OBJ_RELEASE(sub);
orte_pointer_array_set_item(orte_gpr_proxy_globals.subscriptions, index, NULL);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
int int
orte_gpr_proxy_remove_trigger(orte_gpr_trigger_id_t id) orte_gpr_proxy_remove_trigger(orte_gpr_proxy_trigger_t *trig)
{ {
if (NULL != (orte_gpr_proxy_globals.triggers)->addr[id]) { size_t index;
OBJ_RELEASE((orte_gpr_proxy_globals.triggers)->addr[id]);
orte_pointer_array_set_item(orte_gpr_proxy_globals.triggers, (size_t)id, NULL); if (NULL == trig) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
} }
index = trig->index;
OBJ_RELEASE(trig);
orte_pointer_array_set_item(orte_gpr_proxy_globals.triggers, index, NULL);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }

Просмотреть файл

@ -45,6 +45,7 @@ orte_gpr_proxy_subscribe(size_t num_subs,
{ {
orte_buffer_t *cmd; orte_buffer_t *cmd;
orte_buffer_t *answer; orte_buffer_t *answer;
orte_gpr_proxy_subscriber_t **subs;
int rc = ORTE_SUCCESS, ret; int rc = ORTE_SUCCESS, ret;
size_t i; size_t i;
@ -162,8 +163,10 @@ orte_gpr_proxy_subscribe(size_t num_subs,
* numbers are NOT re-used. * numbers are NOT re-used.
*/ */
ERROR: ERROR:
subs = (orte_gpr_proxy_subscriber_t**)(orte_gpr_proxy_globals.subscriptions)->addr;
for (i=0; i < num_subs; i++) { for (i=0; i < num_subs; i++) {
if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_subscription(subscriptions[i]->id))) { /* find the subscription on the local tracker */
if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_subscription(subs[subscriptions[i]->id]))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex);
return rc; return rc;
@ -179,17 +182,33 @@ int orte_gpr_proxy_unsubscribe(orte_gpr_subscription_id_t sub_number)
{ {
orte_buffer_t *cmd; orte_buffer_t *cmd;
orte_buffer_t *answer; orte_buffer_t *answer;
orte_gpr_proxy_subscriber_t **subs;
size_t i, j;
int rc, ret; int rc, ret;
OPAL_THREAD_LOCK(&orte_gpr_proxy_globals.mutex); OPAL_THREAD_LOCK(&orte_gpr_proxy_globals.mutex);
/* remove the specified subscription from the local tracker */ /* remove the specified subscription from the local tracker */
if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_subscription(sub_number))) { subs = (orte_gpr_proxy_subscriber_t**)(orte_gpr_proxy_globals.subscriptions)->addr;
for (i=0, j=0; j < orte_gpr_proxy_globals.num_subs &&
i < (orte_gpr_proxy_globals.subscriptions)->size; i++) {
if (NULL != subs[i]){
j++;
if (sub_number == subs[i]->id) {
if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_subscription(subs[i]))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex);
return rc; return rc;
} }
goto PROCESS;
}
}
}
/* must not have been found - report error */
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
PROCESS:
/* if in compound cmd mode, then just pack the command into /* if in compound cmd mode, then just pack the command into
* that buffer and return * that buffer and return
*/ */
@ -267,17 +286,33 @@ int orte_gpr_proxy_cancel_trigger(orte_gpr_trigger_id_t trig)
{ {
orte_buffer_t *cmd; orte_buffer_t *cmd;
orte_buffer_t *answer; orte_buffer_t *answer;
orte_gpr_proxy_trigger_t **trigs;
size_t i, j;
int rc, ret; int rc, ret;
OPAL_THREAD_LOCK(&orte_gpr_proxy_globals.mutex); OPAL_THREAD_LOCK(&orte_gpr_proxy_globals.mutex);
/* remove the specified trigger from the local tracker */ /* remove the specified trigger from the local tracker */
if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_trigger(trig))) { trigs = (orte_gpr_proxy_trigger_t**)(orte_gpr_proxy_globals.triggers)->addr;
for (i=0, j=0; j < orte_gpr_proxy_globals.num_trigs &&
i < (orte_gpr_proxy_globals.triggers)->size; i++) {
if (NULL != trigs[i]){
j++;
if (trig == trigs[i]->id) {
if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_trigger(trigs[i]))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex); OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex);
return rc; return rc;
} }
goto PROCESS;
}
}
}
/* must not have been found - report error */
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
PROCESS:
/* if the compound cmd mode is on, pack the command into that buffer /* if the compound cmd mode is on, pack the command into that buffer
* and return * and return
*/ */

Просмотреть файл

@ -23,6 +23,7 @@ libmca_gpr_replica_api_la_SOURCES = \
gpr_replica_cleanup_api.c \ gpr_replica_cleanup_api.c \
gpr_replica_compound_cmd_api.c \ gpr_replica_compound_cmd_api.c \
gpr_replica_del_index_api.c \ gpr_replica_del_index_api.c \
gpr_replica_deliver_notify_msg_api.c \
gpr_replica_dump_api.c \ gpr_replica_dump_api.c \
gpr_replica_dump_local_trigs_subs_api.c \ gpr_replica_dump_local_trigs_subs_api.c \
gpr_replica_arithmetic_ops_api.c \ gpr_replica_arithmetic_ops_api.c \

Просмотреть файл

@ -129,9 +129,19 @@ int orte_gpr_replica_dump_all(int output_id);
int orte_gpr_replica_dump_segments(char *segment, int output_id); int orte_gpr_replica_dump_segments(char *segment, int output_id);
int orte_gpr_replica_dump_triggers(int output_id); int orte_gpr_replica_dump_triggers(orte_gpr_trigger_id_t start, int output_id);
int orte_gpr_replica_dump_subscriptions(int output_id); int orte_gpr_replica_dump_subscriptions(orte_gpr_subscription_id_t start, int output_id);
int orte_gpr_replica_dump_a_trigger(
char *name,
orte_gpr_trigger_id_t id,
int output_id);
int orte_gpr_replica_dump_a_subscription(
char *name,
orte_gpr_subscription_id_t id,
int output_id);
int orte_gpr_replica_dump_local_triggers(int output_id); int orte_gpr_replica_dump_local_triggers(int output_id);
@ -150,6 +160,8 @@ int orte_gpr_replica_dump_value(orte_gpr_value_t *value, int output_id);
*/ */
int orte_gpr_replica_preallocate_segment(char *name, size_t num_slots); int orte_gpr_replica_preallocate_segment(char *name, size_t num_slots);
int orte_gpr_replica_deliver_notify_msg(orte_gpr_notify_message_t *msg);
#if defined(c_plusplus) || defined(__cplusplus) #if defined(c_plusplus) || defined(__cplusplus)
} }
#endif #endif

Просмотреть файл

@ -35,16 +35,15 @@ int orte_gpr_replica_deliver_notify_msg(orte_gpr_notify_message_t *msg)
orte_gpr_notify_data_t **data; orte_gpr_notify_data_t **data;
orte_gpr_replica_local_trigger_t **local_trigs; orte_gpr_replica_local_trigger_t **local_trigs;
orte_gpr_replica_local_subscriber_t **local_subs, *sub; orte_gpr_replica_local_subscriber_t **local_subs, *sub;
size_t i, j, k; size_t i, j, k, n;
int rc; int rc;
bool processed; bool processed;
/* we first have to check the trigger id in the message. If that /* we first have to check if the message is a trigger message - if so,
* field is set to a valid value (i.e., one other than * then the message is intended to be
* ORTE_GPR_TRIGGER_ID_MAX), then the message is intended to be
* sent as a single block to that trigger's callback function. * sent as a single block to that trigger's callback function.
*/ */
if (ORTE_GPR_TRIGGER_ID_MAX > msg->id) { if (ORTE_GPR_TRIGGER_MSG == msg->msg_type) {
/* use the local trigger callback */ /* use the local trigger callback */
local_trigs = (orte_gpr_replica_local_trigger_t**) local_trigs = (orte_gpr_replica_local_trigger_t**)
(orte_gpr_replica_globals.local_triggers)->addr; (orte_gpr_replica_globals.local_triggers)->addr;
@ -69,13 +68,25 @@ int orte_gpr_replica_deliver_notify_msg(orte_gpr_notify_message_t *msg)
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
/* get here if the trigger id indicated that this was NOT /* get here if this wasn't a trigger message. Only other allowed message type
* intended for a trigger callback - i.e., the message should * is a subscription message - if that isn't the case, then we have corrupt
* data, so flag it and return
*/
if (ORTE_GPR_SUBSCRIPTION_MSG != msg->msg_type) {
ORTE_ERROR_LOG(ORTE_ERR_GPR_DATA_CORRUPT);
return ORTE_ERR_GPR_DATA_CORRUPT;
}
/* get here if we have a subscription message - i.e., the message should
* be broken into its component parts and delivered separately * be broken into its component parts and delivered separately
* to the indicated subscribers * to the indicated subscribers
*/ */
data = (orte_gpr_notify_data_t**)(msg->data)->addr; data = (orte_gpr_notify_data_t**)(msg->data)->addr;
for (i=0; i < msg->cnt; i++) { for (i=0, n=0; n < msg->cnt &&
i < (msg->data)->size; i++) {
if (NULL != data[i]) {
n++;
if (ORTE_GPR_SUBSCRIPTION_ID_MAX != data[i]->id || NULL != data[i]->target) {
/* for each datagram in the message, we need to lookup /* for each datagram in the message, we need to lookup
* the associated subscription (could be specified by name or id) to find the correct * the associated subscription (could be specified by name or id) to find the correct
* callback function. Name specifications are given precedence over id. * callback function. Name specifications are given precedence over id.
@ -88,10 +99,15 @@ int orte_gpr_replica_deliver_notify_msg(orte_gpr_notify_message_t *msg)
j < (orte_gpr_replica_globals.local_subscriptions)->size; j++) { j < (orte_gpr_replica_globals.local_subscriptions)->size; j++) {
if (NULL != local_subs[j]) { if (NULL != local_subs[j]) {
k++; k++;
if ((NULL != local_subs[j]->name && if (NULL != data[i]->target) {
NULL != data[i]->target && /* if target name provided, must use it */
0 == strcmp(data[i]->target, local_subs[j]->name)) || if (NULL != local_subs[j]->name &&
(data[i]->id == local_subs[j]->id)) { 0 == strcmp(data[i]->target, local_subs[j]->name)) {
sub = local_subs[j];
processed = true;
}
} else if (data[i]->id == local_subs[j]->id) {
/* otherwise, see if id's match */
sub = local_subs[j]; sub = local_subs[j];
processed = true; processed = true;
} }
@ -115,6 +131,8 @@ int orte_gpr_replica_deliver_notify_msg(orte_gpr_notify_message_t *msg)
} }
} }
} }
}
}
/* the calling program will release the message object */ /* the calling program will release the message object */
return ORTE_SUCCESS; return ORTE_SUCCESS;

Просмотреть файл

@ -98,7 +98,7 @@ int orte_gpr_replica_dump_segments(char *segment, int output_id)
return rc; return rc;
} }
int orte_gpr_replica_dump_triggers(int output_id) int orte_gpr_replica_dump_triggers(orte_gpr_trigger_id_t start, int output_id)
{ {
orte_buffer_t *buffer; orte_buffer_t *buffer;
int rc; int rc;
@ -116,7 +116,7 @@ int orte_gpr_replica_dump_triggers(int output_id)
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_triggers_fn(buffer))) { if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_triggers_fn(buffer, start))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
@ -130,7 +130,7 @@ int orte_gpr_replica_dump_triggers(int output_id)
return rc; return rc;
} }
int orte_gpr_replica_dump_subscriptions(int output_id) int orte_gpr_replica_dump_subscriptions(orte_gpr_subscription_id_t start, int output_id)
{ {
orte_buffer_t *buffer; orte_buffer_t *buffer;
int rc; int rc;
@ -143,7 +143,7 @@ int orte_gpr_replica_dump_subscriptions(int output_id)
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_subscriptions_fn(buffer))) { if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_subscriptions_fn(buffer, start))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
@ -157,6 +157,133 @@ int orte_gpr_replica_dump_subscriptions(int output_id)
return rc; return rc;
} }
int orte_gpr_replica_dump_a_trigger(
char *name,
orte_gpr_trigger_id_t id,
int output_id)
{
orte_buffer_t buffer;
orte_gpr_replica_trigger_t **trigs;
size_t i, j;
int rc;
OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex);
OBJ_CONSTRUCT(&buffer, orte_buffer_t);
if (NULL == name) { /* dump the trigger corresponding to the provided id */
trigs = (orte_gpr_replica_trigger_t**)(orte_gpr_replica.triggers)->addr;
for (i=0, j=0; j < orte_gpr_replica.num_trigs &&
i < (orte_gpr_replica.triggers)->size; i++) {
if (NULL != trigs[i]) {
j++;
if (id == trigs[i]->index) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_trigger(&buffer, trigs[i]))) {
ORTE_ERROR_LOG(rc);
}
goto PROCESS;
}
}
}
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
OBJ_DESTRUCT(&buffer);
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
return ORTE_ERR_NOT_FOUND;
} else { /* dump the named trigger */
trigs = (orte_gpr_replica_trigger_t**)(orte_gpr_replica.triggers)->addr;
for (i=0, j=0; j < orte_gpr_replica.num_trigs &&
i < (orte_gpr_replica.triggers)->size; i++) {
if (NULL != trigs[i]) {
j++;
if (0 == strcmp(name, trigs[i]->name)) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_trigger(&buffer, trigs[i]))) {
ORTE_ERROR_LOG(rc);
}
goto PROCESS;
}
}
}
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
OBJ_DESTRUCT(&buffer);
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
return ORTE_ERR_NOT_FOUND;
}
PROCESS:
if (ORTE_SUCCESS == rc) {
orte_gpr_base_print_dump(&buffer, output_id);
}
OBJ_DESTRUCT(&buffer);
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
return rc;
}
int orte_gpr_replica_dump_a_subscription(char *name,
orte_gpr_subscription_id_t id,
int output_id)
{
orte_buffer_t buffer;
orte_gpr_replica_subscription_t **subs;
size_t i, j;
int rc;
OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex);
OBJ_CONSTRUCT(&buffer, orte_buffer_t);
if (NULL == name) { /* dump the subscription corresponding to the provided id */
subs = (orte_gpr_replica_subscription_t**)(orte_gpr_replica.subscriptions)->addr;
for (i=0, j=0; j < orte_gpr_replica.num_subs &&
i < (orte_gpr_replica.subscriptions)->size; i++) {
if (NULL != subs[i]) {
j++;
if (id == subs[i]->index) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_subscription(&buffer, subs[i]))) {
ORTE_ERROR_LOG(rc);
}
goto PROCESS;
}
}
}
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
OBJ_DESTRUCT(&buffer);
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
return ORTE_ERR_NOT_FOUND;
} else { /* dump the named subscription */
subs = (orte_gpr_replica_subscription_t**)(orte_gpr_replica.subscriptions)->addr;
for (i=0, j=0; j < orte_gpr_replica.num_subs &&
i < (orte_gpr_replica.subscriptions)->size; i++) {
if (NULL != subs[i]) {
j++;
if (0 == strcmp(name, subs[i]->name)) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_subscription(&buffer, subs[i]))) {
ORTE_ERROR_LOG(rc);
}
goto PROCESS;
}
}
}
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
OBJ_DESTRUCT(&buffer);
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
return ORTE_ERR_NOT_FOUND;
}
PROCESS:
if (ORTE_SUCCESS == rc) {
orte_gpr_base_print_dump(&buffer, output_id);
}
OBJ_DESTRUCT(&buffer);
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
return rc;
}
int orte_gpr_replica_dump_callbacks(int output_id) int orte_gpr_replica_dump_callbacks(int output_id)
{ {
orte_buffer_t *buffer; orte_buffer_t *buffer;

Просмотреть файл

@ -53,6 +53,11 @@ int orte_gpr_replica_dump_local_triggers(int output_id)
} else { } else {
opal_output(output_id, "\ttrigger name: %s", trigs[j]->name); opal_output(output_id, "\ttrigger name: %s", trigs[j]->name);
} }
if (NULL == trigs[j]->callback) {
opal_output(output_id, "\tNULL callback");
} else {
opal_output(output_id, "\tCallback %0x", trigs[j]->callback);
}
} }
} }
return ORTE_SUCCESS; return ORTE_SUCCESS;
@ -78,7 +83,11 @@ int orte_gpr_replica_dump_local_subscriptions(int output_id)
} else { } else {
opal_output(output_id, "\tsubscription name: %s", subs[j]->name); opal_output(output_id, "\tsubscription name: %s", subs[j]->name);
} }
} if (NULL == subs[j]->callback) {
opal_output(output_id, "\tNULL callback");
} else {
opal_output(output_id, "\tCallback %0x", subs[j]->callback);
} }
} }
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }

Просмотреть файл

@ -25,10 +25,10 @@
#include "orte_config.h" #include "orte_config.h"
#include "dps/dps.h" #include "orte/dps/dps.h"
#include "mca/ns/ns.h" #include "orte/mca/ns/ns.h"
#include "mca/errmgr/errmgr.h" #include "orte/mca/errmgr/errmgr.h"
#include "gpr_replica_api.h" #include "gpr_replica_api.h"
@ -99,11 +99,31 @@ orte_gpr_replica_subscribe(size_t num_subs,
int orte_gpr_replica_unsubscribe(orte_gpr_subscription_id_t sub_number) int orte_gpr_replica_unsubscribe(orte_gpr_subscription_id_t sub_number)
{ {
orte_gpr_replica_local_subscriber_t **subs;
size_t i, j;
int rc; int rc;
OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex);
rc = orte_gpr_replica_remove_subscription(NULL, sub_number); if (ORTE_SUCCESS != (rc = orte_gpr_replica_remove_subscription(NULL, sub_number))) {
ORTE_ERROR_LOG(rc);
}
if (ORTE_SUCCESS == rc) {
/* find and remove it from the local subscription tracking system */
subs = (orte_gpr_replica_local_subscriber_t**)(orte_gpr_replica_globals.local_subscriptions)->addr;
for (i=0, j=0; j < orte_gpr_replica_globals.num_local_subs &&
i < (orte_gpr_replica_globals.local_subscriptions)->size; i++) {
if (NULL != subs[i]) {
j++;
if (sub_number == subs[i]->id) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_remove_local_subscription(subs[i]))) {
ORTE_ERROR_LOG(rc);
}
}
}
}
}
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
@ -113,12 +133,30 @@ int orte_gpr_replica_unsubscribe(orte_gpr_subscription_id_t sub_number)
int orte_gpr_replica_cancel_trigger(orte_gpr_trigger_id_t trig) int orte_gpr_replica_cancel_trigger(orte_gpr_trigger_id_t trig)
{ {
orte_gpr_replica_local_trigger_t **trigs;
size_t i, j;
int rc; int rc;
OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex);
rc = orte_gpr_replica_remove_trigger(NULL, trig); rc = orte_gpr_replica_remove_trigger(NULL, trig);
if (ORTE_SUCCESS == rc) {
/* find and remove it from the local trigger tracking system */
trigs = (orte_gpr_replica_local_trigger_t**)(orte_gpr_replica_globals.local_triggers)->addr;
for (i=0, j=0; j < orte_gpr_replica_globals.num_local_trigs &&
i < (orte_gpr_replica_globals.local_triggers)->size; i++) {
if (NULL != trigs[i]) {
j++;
if (trig == trigs[i]->id) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_remove_local_trigger(trigs[i]))) {
ORTE_ERROR_LOG(rc);
}
}
}
}
}
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex); OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
return rc; return rc;

Просмотреть файл

@ -218,7 +218,7 @@ int orte_gpr_replica_process_command_buffer(orte_buffer_t *input_buffer,
opal_output(0, "\tdump triggers cmd"); opal_output(0, "\tdump triggers cmd");
} }
if (ORTE_SUCCESS != (ret = orte_gpr_replica_recv_dump_triggers_cmd(answer))) { if (ORTE_SUCCESS != (ret = orte_gpr_replica_recv_dump_triggers_cmd(input_buffer, answer))) {
ORTE_ERROR_LOG(ret); ORTE_ERROR_LOG(ret);
goto RETURN_ERROR; goto RETURN_ERROR;
} }
@ -232,7 +232,7 @@ int orte_gpr_replica_process_command_buffer(orte_buffer_t *input_buffer,
opal_output(0, "\tdump subscriptions cmd"); opal_output(0, "\tdump subscriptions cmd");
} }
if (ORTE_SUCCESS != (ret = orte_gpr_replica_recv_dump_subscriptions_cmd(answer))) { if (ORTE_SUCCESS != (ret = orte_gpr_replica_recv_dump_subscriptions_cmd(input_buffer, answer))) {
ORTE_ERROR_LOG(ret); ORTE_ERROR_LOG(ret);
goto RETURN_ERROR; goto RETURN_ERROR;
} }
@ -240,6 +240,32 @@ int orte_gpr_replica_process_command_buffer(orte_buffer_t *input_buffer,
case ORTE_GPR_DUMP_A_TRIGGER_CMD: /***** DUMP *****/
if (orte_gpr_replica_globals.debug) {
opal_output(0, "\tdump a trigger cmd");
}
if (ORTE_SUCCESS != (ret = orte_gpr_replica_recv_dump_a_trigger_cmd(input_buffer, answer))) {
ORTE_ERROR_LOG(ret);
goto RETURN_ERROR;
}
break;
case ORTE_GPR_DUMP_A_SUBSCRIPTION_CMD: /***** DUMP *****/
if (orte_gpr_replica_globals.debug) {
opal_output(0, "\tdump a subscription cmd");
}
if (ORTE_SUCCESS != (ret = orte_gpr_replica_recv_dump_a_subscription_cmd(input_buffer, answer))) {
ORTE_ERROR_LOG(ret);
goto RETURN_ERROR;
}
break;
case ORTE_GPR_DUMP_CALLBACKS_CMD: /***** DUMP *****/ case ORTE_GPR_DUMP_CALLBACKS_CMD: /***** DUMP *****/
if (orte_gpr_replica_globals.debug) { if (orte_gpr_replica_globals.debug) {

Просмотреть файл

@ -111,9 +111,17 @@ int orte_gpr_replica_recv_dump_all_cmd(orte_buffer_t *answer);
int orte_gpr_replica_recv_dump_segments_cmd(orte_buffer_t *input_buffer, orte_buffer_t *answer); int orte_gpr_replica_recv_dump_segments_cmd(orte_buffer_t *input_buffer, orte_buffer_t *answer);
int orte_gpr_replica_recv_dump_triggers_cmd(orte_buffer_t *answer); int orte_gpr_replica_recv_dump_triggers_cmd(orte_buffer_t *input_buffer,
orte_buffer_t *answer);
int orte_gpr_replica_recv_dump_subscriptions_cmd(orte_buffer_t *answer); int orte_gpr_replica_recv_dump_subscriptions_cmd(orte_buffer_t *input_buffer,
orte_buffer_t *answer);
int orte_gpr_replica_recv_dump_a_trigger_cmd(orte_buffer_t *input_buffer,
orte_buffer_t *answer);
int orte_gpr_replica_recv_dump_a_subscription_cmd(orte_buffer_t *input_buffer,
orte_buffer_t *answer);
int orte_gpr_replica_recv_dump_callbacks_cmd(orte_buffer_t *answer); int orte_gpr_replica_recv_dump_callbacks_cmd(orte_buffer_t *answer);

Просмотреть файл

@ -74,9 +74,12 @@ int orte_gpr_replica_recv_dump_segments_cmd(orte_buffer_t *input_buffer, orte_bu
return rc; return rc;
} }
int orte_gpr_replica_recv_dump_triggers_cmd(orte_buffer_t *answer) int orte_gpr_replica_recv_dump_triggers_cmd(orte_buffer_t *input_buffer,
orte_buffer_t *answer)
{ {
orte_gpr_cmd_flag_t command=ORTE_GPR_DUMP_TRIGGERS_CMD; orte_gpr_cmd_flag_t command=ORTE_GPR_DUMP_TRIGGERS_CMD;
orte_gpr_trigger_id_t start;
size_t n;
int rc; int rc;
if (ORTE_SUCCESS != (rc = orte_dps.pack(answer, &command, 1, ORTE_GPR_CMD))) { if (ORTE_SUCCESS != (rc = orte_dps.pack(answer, &command, 1, ORTE_GPR_CMD))) {
@ -84,7 +87,13 @@ int orte_gpr_replica_recv_dump_triggers_cmd(orte_buffer_t *answer)
return rc; return rc;
} }
rc = orte_gpr_replica_dump_triggers_fn(answer); n=1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(input_buffer, &start, &n, ORTE_GPR_TRIGGER_ID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = orte_gpr_replica_dump_triggers_fn(answer, start);
if (ORTE_SUCCESS != rc) { if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
@ -92,9 +101,12 @@ int orte_gpr_replica_recv_dump_triggers_cmd(orte_buffer_t *answer)
return rc; return rc;
} }
int orte_gpr_replica_recv_dump_subscriptions_cmd(orte_buffer_t *answer) int orte_gpr_replica_recv_dump_subscriptions_cmd(orte_buffer_t *input_buffer,
orte_buffer_t *answer)
{ {
orte_gpr_cmd_flag_t command=ORTE_GPR_DUMP_SUBSCRIPTIONS_CMD; orte_gpr_cmd_flag_t command=ORTE_GPR_DUMP_SUBSCRIPTIONS_CMD;
orte_gpr_subscription_id_t start;
size_t n;
int rc; int rc;
if (ORTE_SUCCESS != (rc = orte_dps.pack(answer, &command, 1, ORTE_GPR_CMD))) { if (ORTE_SUCCESS != (rc = orte_dps.pack(answer, &command, 1, ORTE_GPR_CMD))) {
@ -102,7 +114,13 @@ int orte_gpr_replica_recv_dump_subscriptions_cmd(orte_buffer_t *answer)
return rc; return rc;
} }
rc = orte_gpr_replica_dump_subscriptions_fn(answer); n=1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(input_buffer, &start, &n, ORTE_GPR_SUBSCRIPTION_ID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
rc = orte_gpr_replica_dump_subscriptions_fn(answer, start);
if (ORTE_SUCCESS != rc) { if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
@ -110,6 +128,135 @@ int orte_gpr_replica_recv_dump_subscriptions_cmd(orte_buffer_t *answer)
return rc; return rc;
} }
int orte_gpr_replica_recv_dump_a_trigger_cmd(orte_buffer_t *input_buffer,
orte_buffer_t *answer)
{
orte_gpr_cmd_flag_t command=ORTE_GPR_DUMP_A_TRIGGER_CMD;
char *name;
orte_gpr_trigger_id_t id;
orte_gpr_replica_trigger_t **trigs;
size_t n, i, j;
int rc;
if (ORTE_SUCCESS != (rc = orte_dps.pack(answer, &command, 1, ORTE_GPR_CMD))) {
ORTE_ERROR_LOG(rc);
return rc;
}
n=1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(input_buffer, &name, &n, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
n=1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(input_buffer, &id, &n, ORTE_GPR_TRIGGER_ID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (NULL == name) { /* dump the trigger corresponding to the provided id */
trigs = (orte_gpr_replica_trigger_t**)(orte_gpr_replica.triggers)->addr;
for (i=0, j=0; j < orte_gpr_replica.num_trigs &&
i < (orte_gpr_replica.triggers)->size; i++) {
if (NULL != trigs[i]) {
j++;
if (id == trigs[i]->index) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_trigger(answer, trigs[i]))) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
}
}
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
} else { /* dump the named trigger */
trigs = (orte_gpr_replica_trigger_t**)(orte_gpr_replica.triggers)->addr;
for (i=0, j=0; j < orte_gpr_replica.num_trigs &&
i < (orte_gpr_replica.triggers)->size; i++) {
if (NULL != trigs[i]) {
j++;
if (0 == strcmp(name, trigs[i]->name)) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_trigger(answer, trigs[i]))) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
}
}
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
}
return rc;
}
int orte_gpr_replica_recv_dump_a_subscription_cmd(orte_buffer_t *input_buffer,
orte_buffer_t *answer)
{
orte_gpr_cmd_flag_t command=ORTE_GPR_DUMP_A_SUBSCRIPTION_CMD;
orte_gpr_replica_subscription_t **subs;
orte_gpr_subscription_id_t id;
size_t n, i, j;
char *name;
int rc;
if (ORTE_SUCCESS != (rc = orte_dps.pack(answer, &command, 1, ORTE_GPR_CMD))) {
ORTE_ERROR_LOG(rc);
return rc;
}
n=1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(input_buffer, &name, &n, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
n=1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(input_buffer, &id, &n, ORTE_GPR_SUBSCRIPTION_ID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (NULL == name) { /* dump the subscription corresponding to the provided id */
subs = (orte_gpr_replica_subscription_t**)(orte_gpr_replica.subscriptions)->addr;
for (i=0, j=0; j < orte_gpr_replica.num_subs &&
i < (orte_gpr_replica.subscriptions)->size; i++) {
if (NULL != subs[i]) {
j++;
if (id == subs[i]->index) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_subscription(answer, subs[i]))) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
}
}
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
} else { /* dump the named subscription */
subs = (orte_gpr_replica_subscription_t**)(orte_gpr_replica.subscriptions)->addr;
for (i=0, j=0; j < orte_gpr_replica.num_subs &&
i < (orte_gpr_replica.subscriptions)->size; i++) {
if (NULL != subs[i]) {
j++;
if (0 == strcmp(name, subs[i]->name)) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_subscription(answer, subs[i]))) {
ORTE_ERROR_LOG(rc);
}
free(name);
return rc;
}
}
}
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
}
return rc;
}
int orte_gpr_replica_recv_dump_callbacks_cmd(orte_buffer_t *answer) int orte_gpr_replica_recv_dump_callbacks_cmd(orte_buffer_t *answer)
{ {
orte_gpr_cmd_flag_t command=ORTE_GPR_DUMP_CALLBACKS_CMD; orte_gpr_cmd_flag_t command=ORTE_GPR_DUMP_CALLBACKS_CMD;

Просмотреть файл

@ -43,11 +43,6 @@ static void orte_gpr_replica_dump_load_string(orte_buffer_t *buffer, char **tmp)
void orte_gpr_replica_dump_itagval_value(orte_buffer_t *buffer, void orte_gpr_replica_dump_itagval_value(orte_buffer_t *buffer,
orte_gpr_replica_itagval_t *iptr); orte_gpr_replica_itagval_t *iptr);
static void orte_gpr_replica_dump_trigger(orte_buffer_t *buffer, size_t cnt,
orte_gpr_replica_trigger_t *trig);
static void orte_gpr_replica_dump_subscription(orte_buffer_t *buffer,
orte_gpr_replica_subscription_t *sub);
int orte_gpr_replica_dump_all_fn(orte_buffer_t *buffer) int orte_gpr_replica_dump_all_fn(orte_buffer_t *buffer)
{ {
@ -58,11 +53,11 @@ int orte_gpr_replica_dump_all_fn(orte_buffer_t *buffer)
sprintf(tmp_out, "\n\n\nDUMP OF GENERAL PURPOSE REGISTRY"); sprintf(tmp_out, "\n\n\nDUMP OF GENERAL PURPOSE REGISTRY");
orte_gpr_replica_dump_load_string(buffer, &tmp); orte_gpr_replica_dump_load_string(buffer, &tmp);
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_triggers_fn(buffer))) { if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_triggers_fn(buffer, 0))) {
return rc; return rc;
} }
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_subscriptions_fn(buffer))) { if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_subscriptions_fn(buffer, 0))) {
return rc; return rc;
} }
@ -339,11 +334,13 @@ int orte_gpr_replica_dump_callbacks_fn(orte_buffer_t *buffer)
} }
int orte_gpr_replica_dump_triggers_fn(orte_buffer_t *buffer) int orte_gpr_replica_dump_triggers_fn(orte_buffer_t *buffer,
orte_gpr_trigger_id_t start)
{ {
orte_gpr_replica_trigger_t **trig; orte_gpr_replica_trigger_t **trig;
char tmp_out[100], *tmp; char tmp_out[100], *tmp;
size_t j, k; size_t j, k, m;
int rc;
tmp = tmp_out; tmp = tmp_out;
sprintf(tmp_out, "\nDUMP OF GPR TRIGGERS\n"); sprintf(tmp_out, "\nDUMP OF GPR TRIGGERS\n");
@ -354,10 +351,21 @@ int orte_gpr_replica_dump_triggers_fn(orte_buffer_t *buffer)
orte_gpr_replica_dump_load_string(buffer, &tmp); orte_gpr_replica_dump_load_string(buffer, &tmp);
/* dump the trigger info for the registry */ /* dump the trigger info for the registry */
if (0 == start) { /* dump the whole thing */
m = 0;
} else {
m = orte_gpr_replica.num_trigs - start;
}
for (j=0, k=0; k < orte_gpr_replica.num_trigs && for (j=0, k=0; k < orte_gpr_replica.num_trigs &&
j < (orte_gpr_replica.triggers)->size; j++) { j < (orte_gpr_replica.triggers)->size; j++) {
if (NULL != trig[j]) { if (NULL != trig[j]) {
orte_gpr_replica_dump_trigger(buffer, k, trig[j]); if (k >= m) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_trigger(buffer, trig[j]))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
k++; k++;
} }
} }
@ -365,7 +373,7 @@ int orte_gpr_replica_dump_triggers_fn(orte_buffer_t *buffer)
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
static void orte_gpr_replica_dump_trigger(orte_buffer_t *buffer, size_t cnt, int orte_gpr_replica_dump_trigger(orte_buffer_t *buffer,
orte_gpr_replica_trigger_t *trig) orte_gpr_replica_trigger_t *trig)
{ {
char *tmp_out, *token; char *tmp_out, *token;
@ -377,7 +385,7 @@ static void orte_gpr_replica_dump_trigger(orte_buffer_t *buffer, size_t cnt,
tmp_out = (char*)malloc(1000); tmp_out = (char*)malloc(1000);
if (NULL == tmp_out) { if (NULL == tmp_out) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return; return ORTE_ERR_OUT_OF_RESOURCE;
} }
sprintf(tmp_out, "\nData for trigger %lu", (unsigned long) trig->index); sprintf(tmp_out, "\nData for trigger %lu", (unsigned long) trig->index);
@ -501,14 +509,16 @@ static void orte_gpr_replica_dump_trigger(orte_buffer_t *buffer, size_t cnt,
} }
free(tmp_out); free(tmp_out);
return; return ORTE_SUCCESS;
} }
int orte_gpr_replica_dump_subscriptions_fn(orte_buffer_t *buffer) int orte_gpr_replica_dump_subscriptions_fn(orte_buffer_t *buffer,
orte_gpr_subscription_id_t start)
{ {
char *tmp_out, *tmp; char *tmp_out, *tmp;
size_t i, m; size_t i, m, n;
orte_gpr_replica_subscription_t **subs; orte_gpr_replica_subscription_t **subs;
int rc;
tmp_out = (char*)malloc(1000); tmp_out = (char*)malloc(1000);
if (NULL == tmp_out) { if (NULL == tmp_out) {
@ -525,18 +535,29 @@ int orte_gpr_replica_dump_subscriptions_fn(orte_buffer_t *buffer)
orte_gpr_replica_dump_load_string(buffer, &tmp); orte_gpr_replica_dump_load_string(buffer, &tmp);
/* dump the subscription info for the registry */ /* dump the subscription info for the registry */
if (0 == start) { /* dump the whole thing */
n = 0;
} else {
n = orte_gpr_replica.num_subs - start;
}
for (i=0, m=0; m < orte_gpr_replica.num_subs && for (i=0, m=0; m < orte_gpr_replica.num_subs &&
i < (orte_gpr_replica.subscriptions)->size; i++) { i < (orte_gpr_replica.subscriptions)->size; i++) {
if (NULL != subs[i]) { if (NULL != subs[i]) {
if (m >= n) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dump_subscription(buffer, subs[i]))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
m++; m++;
orte_gpr_replica_dump_subscription(buffer, subs[i]);
} }
} }
free(tmp_out); free(tmp_out);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
static void orte_gpr_replica_dump_subscription(orte_buffer_t *buffer, int orte_gpr_replica_dump_subscription(orte_buffer_t *buffer,
orte_gpr_replica_subscription_t *sub) orte_gpr_replica_subscription_t *sub)
{ {
char *tmp_out, *token, *tmp; char *tmp_out, *token, *tmp;
@ -547,17 +568,17 @@ static void orte_gpr_replica_dump_subscription(orte_buffer_t *buffer,
tmp_out = (char*)malloc(1000); tmp_out = (char*)malloc(1000);
if (NULL == tmp_out) { if (NULL == tmp_out) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return; return ORTE_ERR_OUT_OF_RESOURCE;
} }
tmp = tmp_out; tmp = tmp_out;
if (NULL == sub->name) { if (NULL == sub->name) {
sprintf(tmp, "\nSubscription %lu: UNNAMED", sprintf(tmp, "\nSubscription %lu: UNNAMED idtag %lu",
(unsigned long) sub->index); (unsigned long) sub->index, (unsigned long) sub->idtag);
} else { } else {
sprintf(tmp, "\nSubscription %lu name %s", sprintf(tmp, "\nSubscription %lu: name %s idtag %lu",
(unsigned long) sub->index, (unsigned long) sub->index,
sub->name); sub->name, (unsigned long) sub->idtag);
} }
orte_gpr_replica_dump_load_string(buffer, &tmp); orte_gpr_replica_dump_load_string(buffer, &tmp);
@ -727,7 +748,7 @@ static void orte_gpr_replica_dump_subscription(orte_buffer_t *buffer,
} /* for n */ } /* for n */
free(tmp_out); free(tmp_out);
return; return ORTE_SUCCESS;
} }
@ -757,8 +778,7 @@ void orte_gpr_replica_dump_itagval_value(orte_buffer_t *buffer,
break; break;
case ORTE_PID: case ORTE_PID:
sprintf(tmp, "\t\tData type: ORTE_PID\tValue: " ORTE_PID_T_PRINTF, sprintf(tmp, "\t\tData type: ORTE_PID\tValue: %lu", (unsigned long)iptr->value.pid);
iptr->value.pid);
break; break;
case ORTE_INT: case ORTE_INT:

Просмотреть файл

@ -126,11 +126,19 @@ int orte_gpr_replica_dump_segments_fn(orte_buffer_t *buffer, char *segment);
int orte_gpr_replica_dump_a_segment_fn(orte_buffer_t *buffer, orte_gpr_replica_segment_t *seg); int orte_gpr_replica_dump_a_segment_fn(orte_buffer_t *buffer, orte_gpr_replica_segment_t *seg);
int orte_gpr_replica_dump_triggers_fn(orte_buffer_t *buffer); int orte_gpr_replica_dump_triggers_fn(orte_buffer_t *buffer,
orte_gpr_trigger_id_t start);
int orte_gpr_replica_dump_subscriptions_fn(orte_buffer_t *buffer); int orte_gpr_replica_dump_subscriptions_fn(orte_buffer_t *buffer,
orte_gpr_subscription_id_t start);
int orte_gpr_replica_dump_callbacks_fn(orte_buffer_t *buffer); int orte_gpr_replica_dump_trigger(orte_buffer_t *buffer,
orte_gpr_replica_trigger_t *trig);
int orte_gpr_replica_dump_subscription(orte_buffer_t *buffer,
orte_gpr_replica_subscription_t *sub);
int orte_gpr_replica_dump_callbacks_fn(orte_buffer_t *buffer);
/* /*
* ********* INTERNAL UTILITY FUNCTIONS ********** * ********* INTERNAL UTILITY FUNCTIONS **********
@ -203,9 +211,9 @@ int orte_gpr_replica_enter_local_subscription(size_t cnt, orte_gpr_subscription_
int orte_gpr_replica_enter_local_trigger(size_t cnt, orte_gpr_trigger_t **trigs); int orte_gpr_replica_enter_local_trigger(size_t cnt, orte_gpr_trigger_t **trigs);
int orte_gpr_replica_remove_local_subscription(orte_gpr_subscription_id_t id); int orte_gpr_replica_remove_local_subscription(orte_gpr_replica_local_subscriber_t *sub);
int orte_gpr_proxy_remove_local_trigger(orte_gpr_trigger_id_t id); int orte_gpr_replica_remove_local_trigger(orte_gpr_replica_local_trigger_t *trig);
int orte_gpr_replica_record_action(orte_gpr_replica_segment_t *seg, int orte_gpr_replica_record_action(orte_gpr_replica_segment_t *seg,
orte_gpr_replica_container_t *cptr, orte_gpr_replica_container_t *cptr,
@ -247,18 +255,20 @@ int
orte_gpr_replica_remove_trigger(orte_process_name_t *requestor, orte_gpr_replica_remove_trigger(orte_process_name_t *requestor,
orte_gpr_trigger_id_t id); orte_gpr_trigger_id_t id);
int orte_gpr_replica_register_callback(orte_gpr_replica_trigger_t *trig, int orte_gpr_replica_register_callback(orte_gpr_replica_subscription_t *sub,
orte_gpr_replica_subscription_t *sub,
orte_gpr_value_t *value); orte_gpr_value_t *value);
int orte_gpr_replica_define_callback(orte_gpr_replica_callbacks_t **cbptr, int orte_gpr_replica_register_trigger_callback(orte_gpr_replica_trigger_t *trig);
int orte_gpr_replica_define_callback(orte_gpr_notify_msg_type_t msg_type,
orte_gpr_replica_callbacks_t **cbptr,
orte_process_name_t *recipient); orte_process_name_t *recipient);
int orte_gpr_replica_process_callbacks(void); int orte_gpr_replica_process_callbacks(void);
int orte_gpr_replica_purge_subscriptions(orte_process_name_t *proc); int orte_gpr_replica_purge_subscriptions(orte_process_name_t *proc);
int orte_gpr_replica_store_value_in_msg(orte_gpr_subscription_id_t id, int orte_gpr_replica_store_value_in_msg(orte_gpr_replica_requestor_t *req,
orte_gpr_notify_message_t *msg, orte_gpr_notify_message_t *msg,
size_t cnt, size_t cnt,
orte_gpr_value_t **values); orte_gpr_value_t **values);

Просмотреть файл

@ -40,7 +40,7 @@ int
orte_gpr_replica_enter_local_subscription(size_t cnt, orte_gpr_subscription_t **subscriptions) orte_gpr_replica_enter_local_subscription(size_t cnt, orte_gpr_subscription_t **subscriptions)
{ {
orte_gpr_replica_local_subscriber_t *sub; orte_gpr_replica_local_subscriber_t *sub;
size_t i, id; size_t i;
for (i=0; i < cnt; i++) { for (i=0; i < cnt; i++) {
sub = OBJ_NEW(orte_gpr_replica_local_subscriber_t); sub = OBJ_NEW(orte_gpr_replica_local_subscriber_t);
@ -53,7 +53,7 @@ orte_gpr_replica_enter_local_subscription(size_t cnt, orte_gpr_subscription_t **
} }
sub->callback = subscriptions[i]->cbfunc; sub->callback = subscriptions[i]->cbfunc;
sub->user_tag = subscriptions[i]->user_tag; sub->user_tag = subscriptions[i]->user_tag;
if (0 > orte_pointer_array_add(&id, orte_gpr_replica_globals.local_subscriptions, sub)) { if (0 > orte_pointer_array_add(&sub->index, orte_gpr_replica_globals.local_subscriptions, sub)) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
@ -69,10 +69,45 @@ orte_gpr_replica_enter_local_subscription(size_t cnt, orte_gpr_subscription_t **
int int
orte_gpr_replica_enter_local_trigger(size_t cnt, orte_gpr_trigger_t **trigs) orte_gpr_replica_enter_local_trigger(size_t cnt, orte_gpr_trigger_t **trigs)
{ {
orte_gpr_replica_local_trigger_t *trig; orte_gpr_replica_local_trigger_t *trig, **tptr;
size_t i, id; size_t i, j, k;
for (i=0; i < cnt; i++) { for (i=0; i < cnt; i++) {
/* If the provided trigger has a name, see if it already is on
* the local trigger list. If so, then check to see if we
* already defined a return point for it and/or if this trigger
* doesn't - in either of those two cases, we ignore the
* trigger and just use the existing entry
*/
if (NULL != trigs[i]->name) {
tptr = (orte_gpr_replica_local_trigger_t**)(orte_gpr_replica_globals.local_triggers)->addr;
for (j=0, k=0; k < orte_gpr_replica_globals.num_local_trigs &&
j < (orte_gpr_replica_globals.local_triggers)->size; j++) {
if (NULL != tptr[j]) {
k++;
if (0 == strcmp(tptr[j]->name, trigs[i]->name)) {
/* same name - trigger is already on list */
if (NULL != tptr[j]->callback || NULL == trigs[i]->cbfunc) {
/* ignore these cases */
trig = tptr[j];
goto MOVEON;
}
/* reach here if either the prior trigger didn't provide
* a callback, and the new one provides one. In this
* case, we update the existing trigger callback and then
* move on
*/
tptr[j]->callback = trigs[i]->cbfunc;
trig = tptr[j];
goto MOVEON;
}
}
}
}
/* either the trigger doesn't have a name, OR it did, but it isn't
* already on the list - add it to the list now
*/
trig = OBJ_NEW(orte_gpr_replica_local_trigger_t); trig = OBJ_NEW(orte_gpr_replica_local_trigger_t);
if (NULL == trig) { if (NULL == trig) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
@ -94,35 +129,49 @@ orte_gpr_replica_enter_local_trigger(size_t cnt, orte_gpr_trigger_t **trigs)
} }
trig->callback = trigs[i]->cbfunc; trig->callback = trigs[i]->cbfunc;
trig->user_tag = trigs[i]->user_tag; trig->user_tag = trigs[i]->user_tag;
if (0 > orte_pointer_array_add(&id, orte_gpr_replica_globals.local_triggers, trig)) { if (0 > orte_pointer_array_add(&trig->index, orte_gpr_replica_globals.local_triggers, trig)) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
trig->id = orte_gpr_replica_globals.num_local_trigs; trig->id = orte_gpr_replica_globals.num_local_trigs;
trigs[i]->id = trig->id;
(orte_gpr_replica_globals.num_local_trigs)++; (orte_gpr_replica_globals.num_local_trigs)++;
MOVEON:
trigs[i]->id = trig->id;
} }
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
int orte_gpr_replica_remove_local_subscription(orte_gpr_subscription_id_t id) int orte_gpr_replica_remove_local_subscription(orte_gpr_replica_local_subscriber_t *sub)
{ {
if (NULL != (orte_gpr_replica_globals.local_subscriptions)->addr[id]) { size_t index;
OBJ_RELEASE((orte_gpr_replica_globals.local_subscriptions)->addr[id]);
orte_pointer_array_set_item(orte_gpr_replica_globals.local_subscriptions, (size_t)id, NULL); if (NULL == sub) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
} }
index = sub->index;
OBJ_RELEASE(sub);
orte_pointer_array_set_item(orte_gpr_replica_globals.local_subscriptions, index, NULL);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
int orte_gpr_proxy_remove_local_trigger(orte_gpr_trigger_id_t id) int orte_gpr_replica_remove_local_trigger(orte_gpr_replica_local_trigger_t *trig)
{ {
if (NULL != (orte_gpr_replica_globals.local_triggers)->addr[id]) { size_t index;
OBJ_RELEASE((orte_gpr_replica_globals.local_triggers)->addr[id]);
orte_pointer_array_set_item(orte_gpr_replica_globals.local_triggers, (size_t)id, NULL); if (NULL == trig) {
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
return ORTE_ERR_BAD_PARAM;
} }
index = trig->index;
OBJ_RELEASE(trig);
orte_pointer_array_set_item(orte_gpr_replica_globals.local_triggers, index, NULL);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }

Просмотреть файл

@ -39,17 +39,21 @@
#include "mca/gpr/replica/communications/gpr_replica_comm.h" #include "mca/gpr/replica/communications/gpr_replica_comm.h"
#include "gpr_replica_fn.h" #include "gpr_replica_fn.h"
static int orte_gpr_replica_get_callback_data(orte_gpr_value_t ***values, size_t *num_vals,
orte_gpr_replica_subscription_t *sub);
static int orte_gpr_replica_store_value_in_trigger_msg(orte_gpr_replica_subscription_t *sub,
orte_gpr_notify_message_t *msg,
size_t cnt,
orte_gpr_value_t **values);
int orte_gpr_replica_process_callbacks(void) int orte_gpr_replica_process_callbacks(void)
{ {
orte_gpr_replica_callbacks_t *cb; orte_gpr_replica_callbacks_t *cb;
orte_gpr_notify_data_t **data;
orte_gpr_replica_trigger_t **trigs; orte_gpr_replica_trigger_t **trigs;
orte_gpr_replica_local_trigger_t **local_trigs;
orte_gpr_replica_subscription_t **subs; orte_gpr_replica_subscription_t **subs;
orte_gpr_replica_local_subscriber_t **local_subs;
orte_gpr_replica_requestor_t **reqs; orte_gpr_replica_requestor_t **reqs;
size_t i, j, k, m; size_t i, j, k, m;
bool processed;
int rc; int rc;
/* check and set flag indicating callbacks being processed */ /* check and set flag indicating callbacks being processed */
@ -59,80 +63,23 @@ int orte_gpr_replica_process_callbacks(void)
orte_gpr_replica.processing_callbacks = true; orte_gpr_replica.processing_callbacks = true;
while (NULL != (cb = (orte_gpr_replica_callbacks_t*)opal_list_remove_last(&orte_gpr_replica.callbacks))) { while (NULL != (cb = (orte_gpr_replica_callbacks_t*)opal_list_remove_last(&orte_gpr_replica.callbacks))) {
if (NULL == cb->requestor) { /* local callback */
/* each callback corresponds to a specific requestor /* each callback corresponds to a specific requestor
* The message in the callback consists of at least one (and can * The message in the callback consists of at least one (and can
* be more) "datagrams" intended for that requestor, each of which * be more) "datagrams" intended for that requestor, each of which
* is slated to be returned to a specific * is slated to be returned to a specific function on the requestor.
* function on the requestor. */
* if (NULL == cb->requestor) { /* local callback */
* Since this requestor is "local", we simply execute /* Since this requestor is "local", we simply execute
* the callbacks ourself. * the callbacks ourself.
*/ */
/* we first have to check the trigger id in the message. If that if (ORTE_SUCCESS != (rc = orte_gpr_replica_deliver_notify_msg(cb->message))) {
* field is set to a valid value (i.e., one other than ORTE_ERROR_LOG(rc);
* ORTE_GPR_TRIGGER_ID_MAX), then the message is intended to be
* sent as a single block to that trigger's callback function.
*/
if (ORTE_GPR_TRIGGER_ID_MAX > (cb->message)->id) {
/* use the local trigger callback */
local_trigs = (orte_gpr_replica_local_trigger_t**)
(orte_gpr_replica_globals.local_triggers)->addr;
for (i=0, j=0; j < orte_gpr_replica_globals.num_local_trigs &&
i < (orte_gpr_replica_globals.local_triggers)->size; i++) {
if (NULL != local_trigs[i]) {
j++;
if ((cb->message)->id == local_trigs[i]->id) {
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
local_trigs[i]->callback(cb->message, local_trigs[i]->user_tag);
OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex);
goto CLEANUP;
}
}
}
/* get here if the trigger could not be found */
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
goto CLEANUP;
}
/* get here if the trigger id indicated that this was NOT
* intended for a trigger callback - i.e., the message should
* be broken into its component parts and delivered separately
* to the indicated subscribers
*/
data = (orte_gpr_notify_data_t**)((cb->message)->data)->addr;
for (i=0; i < (cb->message)->cnt; i++) {
/* for each datagram in the message, we need to lookup
* the associated subscription id to find the correct
* callback function.
*/
local_subs = (orte_gpr_replica_local_subscriber_t**)
(orte_gpr_replica_globals.local_subscriptions)->addr;
processed = false;
for (j=0, k=0; !processed &&
k < orte_gpr_replica_globals.num_local_subs &&
j < (orte_gpr_replica_globals.local_subscriptions)->size; j++) {
if (NULL != local_subs[j]) {
k++;
if (data[i]->id == local_subs[j]->id) {
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
local_subs[j]->callback(data[i], local_subs[j]->user_tag);
OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex);
processed = true;
}
}
}
/* get here and not processed => not found */
if (!processed) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
}
} }
} else { /* remote request - send messages back */ } else { /* remote request - send messages back */
orte_gpr_replica_remote_notify(cb->requestor, cb->message); orte_gpr_replica_remote_notify(cb->requestor, cb->message);
} }
CLEANUP:
OBJ_RELEASE(cb); OBJ_RELEASE(cb);
} }
@ -192,16 +139,14 @@ CLEANUP:
int orte_gpr_replica_register_callback(orte_gpr_replica_trigger_t *trig, int orte_gpr_replica_register_callback(orte_gpr_replica_subscription_t *sub,
orte_gpr_replica_subscription_t *sub,
orte_gpr_value_t *value) orte_gpr_value_t *value)
{ {
orte_gpr_replica_callbacks_t *cb; orte_gpr_replica_callbacks_t *cb;
orte_gpr_replica_requestor_t **reqs; orte_gpr_replica_requestor_t **reqs;
size_t interim, cnt, num_tokens, num_keys; orte_gpr_value_t **values;
orte_gpr_value_t **vals, **values; size_t cnt;
orte_gpr_replica_ivalue_t **ivals; size_t i, j;
size_t i, j, k;
bool cleanup_reqd; bool cleanup_reqd;
int rc; int rc;
@ -210,76 +155,15 @@ int orte_gpr_replica_register_callback(orte_gpr_replica_trigger_t *trig,
* already been provided) so we have it ready to be added to * already been provided) so we have it ready to be added to
* the callback * the callback
*/ */
if (NULL != value) { /* no need to get data - already provided */
/* check to see if value provided - if so, we'll just use it */
if (NULL != value) {
values = &value; values = &value;
cnt = 1; cnt = 1;
cleanup_reqd = false; cleanup_reqd = false;
} else { } else {
/* value not provided - get the data off the registry. since a if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_callback_data(&values, &cnt, sub))) {
* subscription can have multiple data sources specified, we
* have to loop through those sources, constructing an aggregated
* array of data values that we can work with in composing the
* final message
*/
ivals = (orte_gpr_replica_ivalue_t**)(sub->values)->addr;
cnt = 0;
values = NULL;
for (i=0, j=0; j < sub->num_values &&
i < (sub->values)->size; i++) {
if (NULL != ivals[i]) {
j++;
num_tokens = orte_value_array_get_size(&(ivals[i]->tokentags));
num_keys = orte_value_array_get_size(&(ivals[i]->keytags));
/* get the data for this description off the registry */
if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_fn(ivals[i]->addr_mode,
ivals[i]->seg,
ORTE_VALUE_ARRAY_GET_BASE(&(ivals[i]->tokentags), orte_gpr_replica_itag_t),
num_tokens,
ORTE_VALUE_ARRAY_GET_BASE(&(ivals[i]->keytags), orte_gpr_replica_itag_t),
num_keys,
&interim, &vals))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* if we don't get any data back, just continue - don't
* try to add it to the values since that would cause a
* zero-byte malloc
*/
if (0 == interim) {
continue;
}
/* add these results to those we have already obtained */
if (0 == cnt) { /* first time through */
values = (orte_gpr_value_t**)malloc(interim *
sizeof(orte_gpr_value_t*));
if (NULL == values) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
} else {
/* reallocate values array */
values = (orte_gpr_value_t**)realloc(values,
(cnt+interim)*sizeof(orte_gpr_value_t*));
if (NULL == values) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
}
/* add data to end of array */
for (k=0; k < interim; k++) {
values[k+cnt] = vals[k];
}
/* release the array of pointers - the pointers themselves
* will remain "alive" in the values array to be released
* later
*/
free(vals);
/* update the count */
cnt += interim;
}
}
cleanup_reqd = true; cleanup_reqd = true;
} }
@ -300,42 +184,6 @@ int orte_gpr_replica_register_callback(orte_gpr_replica_trigger_t *trig,
* another datagram onto it to minimize communication costs. * another datagram onto it to minimize communication costs.
*/ */
/* first, we need to determine if the data in this message
* is to be sent back through the trigger callback function
* or not. if it is, then we set the callback's message
* to point at the correct trigger id for that requestor
* so the message goes to the correct place, and we go ahead
* and store the data in the message
*/
if (NULL != trig && NULL != trig->master) {
/* define the callback */
if (ORTE_SUCCESS != (rc = orte_gpr_replica_define_callback(&cb, (trig->master)->requestor))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
/* transfer the trigger name, if available */
if (NULL != trig->name) {
(cb->message)->name = strdup(trig->name);
}
/* set the callback id to point to the trigger callback function */
(cb->message)->id = (trig->master)->idtag;
/* cycle through all the subscription's requestors and place
* the data on the message so that the trigger master can distribute
* it as required
*/
reqs = (orte_gpr_replica_requestor_t**)(sub->requestors)->addr;
for (i=0, j=0; j < sub->num_requestors &&
i < (sub->requestors)->size; i++) {
if (NULL != reqs[i]) {
j++;
if (ORTE_SUCCESS != (rc = orte_gpr_replica_store_value_in_msg(reqs[i]->idtag,
cb->message, cnt, values))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
}
}
} else {
/* this data is intended to be sent to the individual /* this data is intended to be sent to the individual
* subscribers themselves. Cycle through the subscription's * subscribers themselves. Cycle through the subscription's
* requestors, define callbacks to them appropriately, * requestors, define callbacks to them appropriately,
@ -348,7 +196,8 @@ int orte_gpr_replica_register_callback(orte_gpr_replica_trigger_t *trig,
if (NULL != reqs[i]) { if (NULL != reqs[i]) {
j++; j++;
/* define the callback */ /* define the callback */
if (ORTE_SUCCESS != (rc = orte_gpr_replica_define_callback(&cb, reqs[i]->requestor))) { if (ORTE_SUCCESS != (rc = orte_gpr_replica_define_callback(ORTE_GPR_SUBSCRIPTION_MSG,
&cb, reqs[i]->requestor))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto CLEANUP; goto CLEANUP;
} }
@ -358,14 +207,13 @@ int orte_gpr_replica_register_callback(orte_gpr_replica_trigger_t *trig,
* store the values in the notify_data structure corresponding to this * store the values in the notify_data structure corresponding to this
* subscription id, combining data where the id's match * subscription id, combining data where the id's match
*/ */
if (ORTE_SUCCESS != (rc = orte_gpr_replica_store_value_in_msg(reqs[i]->idtag, if (ORTE_SUCCESS != (rc = orte_gpr_replica_store_value_in_msg(reqs[i],
cb->message, cnt, values))) { cb->message, cnt, values))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto CLEANUP; goto CLEANUP;
} }
} }
} /* for i */ } /* for i */
} /* if else */
CLEANUP: CLEANUP:
/* release the values here - the value objects have been "retained" in /* release the values here - the value objects have been "retained" in
@ -383,7 +231,136 @@ CLEANUP:
} }
int orte_gpr_replica_define_callback(orte_gpr_replica_callbacks_t **cbptr, int orte_gpr_replica_register_trigger_callback(orte_gpr_replica_trigger_t *trig)
{
orte_gpr_replica_callbacks_t *cb;
orte_gpr_replica_counter_t **cntr;
orte_gpr_replica_subscription_t **subs;
orte_gpr_value_t **values, *value;
size_t i, j, k, cnt;
int rc;
/* set the callback's message
* to point at the correct trigger id for that requestor
* so the message goes to the correct place, and go ahead
* and store the data in the message
*/
/* define the callback */
if (ORTE_SUCCESS != (rc = orte_gpr_replica_define_callback(ORTE_GPR_TRIGGER_MSG,
&cb, (trig->master)->requestor))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* transfer the trigger name, if available */
if (NULL != trig->name) {
(cb->message)->target = strdup(trig->name);
}
/* set the callback id to point to the trigger callback function */
(cb->message)->id = (trig->master)->idtag;
/* if the trigger counters are to be included, do so */
if (ORTE_GPR_TRIG_INCLUDE_TRIG_CNTRS & trig->action) {
cntr = (orte_gpr_replica_counter_t**)((trig->counters)->addr);
for (i=0, j=0; j < trig->num_counters &&
i < (trig->counters)->size; i++) {
if (NULL != cntr[i]) {
j++;
value = OBJ_NEW(orte_gpr_value_t);
if (NULL == value) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value->segment = strdup(cntr[i]->seg->name);
value->cnt = 1;
value->keyvals = (orte_gpr_keyval_t**)malloc(sizeof(orte_gpr_keyval_t*));
if (NULL == value->keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value->keyvals[0] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == value->keyvals[0]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dict_reverse_lookup(
&(value->keyvals[0]->key), cntr[i]->seg,
cntr[i]->iptr->itag))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(value);
return rc;
}
value->keyvals[0]->type = cntr[i]->iptr->type;
if (ORTE_SUCCESS != (rc = orte_gpr_base_xfer_payload(
&(value->keyvals[0]->value),
&(cntr[i]->iptr->value), cntr[i]->iptr->type))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(value);
return rc;
}
/*
* store the data in the message
*/
if (ORTE_SUCCESS != (rc = orte_gpr_replica_store_value_in_trigger_msg(NULL,
cb->message, 1, &value))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* release the storage */
OBJ_RELEASE(value);
}
}
}
/* cycle through all the trigger's subscriptions and place
* that data on the message
*/
subs = (orte_gpr_replica_subscription_t**)(trig->subscriptions)->addr;
for (i=0, j=0; j < trig->num_subscriptions &&
i < (trig->subscriptions)->size; i++) {
if (NULL != subs[i]) {
j++;
if (NULL != subs[i]->name) {
/* if it's a named subscription, we will deliver it via the
* trigger callback function. The data to be returned will
* be the same for all requestors.
*/
if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_callback_data(&values, &cnt, subs[i]))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/*
* store the data in the message
*/
if (ORTE_SUCCESS != (rc = orte_gpr_replica_store_value_in_trigger_msg(subs[i],
cb->message, cnt, values))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* release the storage */
for (k=0; k < cnt; k++) OBJ_RELEASE(values[k]);
if (NULL != values) free(values);
} else {
/* in the case of a non-named subscription, we know that someone
* has attached a subscription to this trigger, and that the
* requestor needs the data to be returned directly to them. This
* occurs in the case of orterun, which attaches subscriptions to
* the standard triggers so it can monitor the progress of a job
* it has launched. To facilitate this, we register a separate
* callback for this subscription
*/
if (ORTE_SUCCESS != (rc = orte_gpr_replica_register_callback(subs[i], NULL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}
}
return ORTE_SUCCESS;
}
int orte_gpr_replica_define_callback(orte_gpr_notify_msg_type_t msg_type,
orte_gpr_replica_callbacks_t **cbptr,
orte_process_name_t *recipient) orte_process_name_t *recipient)
{ {
orte_gpr_replica_callbacks_t *cb; orte_gpr_replica_callbacks_t *cb;
@ -393,12 +370,19 @@ int orte_gpr_replica_define_callback(orte_gpr_replica_callbacks_t **cbptr,
for (cb = (orte_gpr_replica_callbacks_t*)opal_list_get_first(&(orte_gpr_replica.callbacks)); for (cb = (orte_gpr_replica_callbacks_t*)opal_list_get_first(&(orte_gpr_replica.callbacks));
cb != (orte_gpr_replica_callbacks_t*)opal_list_get_end(&(orte_gpr_replica.callbacks)); cb != (orte_gpr_replica_callbacks_t*)opal_list_get_end(&(orte_gpr_replica.callbacks));
cb = (orte_gpr_replica_callbacks_t*)opal_list_get_next(cb)) { cb = (orte_gpr_replica_callbacks_t*)opal_list_get_next(cb)) {
/* must check to see if both the recipient is the same AND that the
if ((NULL == recipient && NULL == cb->requestor) || * message type being sent is identical (i.e., that messages going back
((NULL != recipient && NULL != cb->requestor) && * to trigger callbacks do NOT get mixed with messages going back to
* subscription callbacks). This is critical as the deliver_notify_msg
* functions handle these message types in different ways
*/
if (((NULL == recipient && NULL == cb->requestor) &&
(msg_type == cb->message->msg_type)) ||
(((NULL != recipient && NULL != cb->requestor) &&
(0 == orte_ns.compare(ORTE_NS_CMP_ALL, (0 == orte_ns.compare(ORTE_NS_CMP_ALL,
recipient, recipient,
cb->requestor)))) { cb->requestor))) &&
(msg_type == cb->message->msg_type))) {
/* okay, a callback has been registered to send data to this /* okay, a callback has been registered to send data to this
* recipient - return this location * recipient - return this location
*/ */
@ -424,6 +408,7 @@ int orte_gpr_replica_define_callback(orte_gpr_replica_callbacks_t **cbptr,
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
cb->message->msg_type = msg_type;
if (NULL == recipient) { if (NULL == recipient) {
cb->requestor = NULL; cb->requestor = NULL;
@ -441,7 +426,7 @@ int orte_gpr_replica_define_callback(orte_gpr_replica_callbacks_t **cbptr,
} }
int orte_gpr_replica_store_value_in_msg(orte_gpr_subscription_id_t id, int orte_gpr_replica_store_value_in_msg(orte_gpr_replica_requestor_t *req,
orte_gpr_notify_message_t *msg, orte_gpr_notify_message_t *msg,
size_t cnt, size_t cnt,
orte_gpr_value_t **values) orte_gpr_value_t **values)
@ -459,7 +444,7 @@ int orte_gpr_replica_store_value_in_msg(orte_gpr_subscription_id_t id,
i < (msg->data)->size; i++) { i < (msg->data)->size; i++) {
if (NULL != data[i]) { if (NULL != data[i]) {
k++; k++;
if (data[i]->id == id) { /* going to the same place */ if (data[i]->id == req->idtag) { /* going to the same place */
for (j=0; j < cnt; j++) { for (j=0; j < cnt; j++) {
if (0 > orte_pointer_array_add(&index, data[i]->values, values[j])) { if (0 > orte_pointer_array_add(&index, data[i]->values, values[j])) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
@ -489,7 +474,7 @@ int orte_gpr_replica_store_value_in_msg(orte_gpr_subscription_id_t id,
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
dptr->id = id; dptr->id = req->idtag;
if (0 > orte_pointer_array_add(&index, msg->data, dptr)) { if (0 > orte_pointer_array_add(&index, msg->data, dptr)) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
@ -514,3 +499,159 @@ int orte_gpr_replica_store_value_in_msg(orte_gpr_subscription_id_t id,
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
static int orte_gpr_replica_store_value_in_trigger_msg(orte_gpr_replica_subscription_t *sub,
orte_gpr_notify_message_t *msg,
size_t cnt,
orte_gpr_value_t **values)
{
size_t i, j, k, index;
orte_gpr_notify_data_t **data, *dptr;
/* check to see if this data is going to the same place as
* any prior data on the message. if so, then we add the values
* to that existing data structure. if not, then we realloc to
* establish a new data structure and store the data there
*/
data = (orte_gpr_notify_data_t**)(msg->data)->addr;
for (i=0, k=0; k < msg->cnt &&
i < (msg->data)->size; i++) {
if (NULL != data[i]) {
k++;
if ((NULL == data[i]->target && NULL == sub) ||
(NULL != data[i]->target &&
0 == strcmp(data[i]->target, sub->name))) { /* going to the same place */
for (j=0; j < cnt; j++) {
if (0 > orte_pointer_array_add(&index, data[i]->values, values[j])) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* must "retain" the value object to ensure that it is
* there for this datagram. Since we are only storing
* pointers to the object (and not actually copying it),
* datagrams may wind up sharing the object. Hence, when
* a datagram is released, it will release the object. Without
* the retain, the next datagram that shares that object
* will see trash
*/
OBJ_RETAIN(values[j]);
}
data[i]->cnt += cnt;
return ORTE_SUCCESS;
}
}
}
/* no prior matching data found, so add another data location to
* the message and store the values there
*/
dptr = OBJ_NEW(orte_gpr_notify_data_t);
if (NULL == dptr) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (NULL != sub) {
dptr->target = strdup(sub->name);
}
if (0 > orte_pointer_array_add(&index, msg->data, dptr)) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
(msg->cnt)++;
for (j=0; j < cnt; j++) {
if (0 > orte_pointer_array_add(&index, dptr->values, values[j])) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* must "retain" the value object to ensure that it is
* there for this datagram. Since we are only storing
* pointers to the object (and not actually copying it),
* datagrams may wind up sharing the object. Hence, when
* a datagram is released, it will release the object. Without
* the retain, the next datagram that shares that object
* will see trash
*/
OBJ_RETAIN(values[j]);
}
dptr->cnt = cnt;
return ORTE_SUCCESS;
}
static int orte_gpr_replica_get_callback_data(orte_gpr_value_t ***ret_values, size_t *cnt,
orte_gpr_replica_subscription_t *sub)
{
orte_gpr_value_t **vals, **values;
orte_gpr_replica_ivalue_t **ivals;
size_t i, j, k, num_tokens, num_keys, interim, count;
int rc;
/* setup default error returns */
*ret_values = NULL;
*cnt = 0;
/* get the data off the registry. since a
* subscription can have multiple data sources specified, we
* have to loop through those sources, constructing an aggregated
* array of data values that we can work with in composing the
* final message
*/
ivals = (orte_gpr_replica_ivalue_t**)(sub->values)->addr;
count = 0;
values = NULL;
for (i=0, j=0; j < sub->num_values &&
i < (sub->values)->size; i++) {
if (NULL != ivals[i]) {
j++;
num_tokens = orte_value_array_get_size(&(ivals[i]->tokentags));
num_keys = orte_value_array_get_size(&(ivals[i]->keytags));
/* get the data for this description off the registry */
if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_fn(ivals[i]->addr_mode,
ivals[i]->seg,
ORTE_VALUE_ARRAY_GET_BASE(&(ivals[i]->tokentags), orte_gpr_replica_itag_t),
num_tokens,
ORTE_VALUE_ARRAY_GET_BASE(&(ivals[i]->keytags), orte_gpr_replica_itag_t),
num_keys,
&interim, &vals))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* if we don't get any data back, just continue - don't
* try to add it to the values since that would cause a
* zero-byte malloc
*/
if (0 == interim) {
continue;
}
/* add these results to those we have already obtained */
if (0 == count) { /* first time through */
values = (orte_gpr_value_t**)malloc(interim *
sizeof(orte_gpr_value_t*));
if (NULL == values) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
} else {
/* reallocate values array */
values = (orte_gpr_value_t**)realloc(values,
(count+interim)*sizeof(orte_gpr_value_t*));
if (NULL == values) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
}
/* add data to end of array */
for (k=0; k < interim; k++) {
values[k+count] = vals[k];
}
/* release the array of pointers - the pointers themselves
* will remain "alive" in the values array to be released
* later
*/
free(vals);
/* update the count */
count += interim;
}
}
*ret_values = values;
*cnt = count;
return ORTE_SUCCESS;
}

Просмотреть файл

@ -86,11 +86,19 @@ orte_gpr_replica_register_subscription(orte_gpr_replica_subscription_t **subptr,
* Un-named subscriptions are, therefore, assumed to be specialty * Un-named subscriptions are, therefore, assumed to be specialty
* subscriptions that do not merit such consideration. * subscriptions that do not merit such consideration.
*/ */
/* see if another subscription is available on the system */
if (ORTE_GPR_SUBSCRIPTION_ID_MAX-1 < orte_gpr_replica.num_subs) { /* none left! */
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub = OBJ_NEW(orte_gpr_replica_subscription_t); sub = OBJ_NEW(orte_gpr_replica_subscription_t);
if (NULL == sub) { if (NULL == sub) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
sub->idtag = orte_gpr_replica.num_subs;
if (NULL != subscription->name) { if (NULL != subscription->name) {
sub->name = strdup(subscription->name); sub->name = strdup(subscription->name);
@ -107,6 +115,7 @@ orte_gpr_replica_register_subscription(orte_gpr_replica_subscription_t **subptr,
ival = OBJ_NEW(orte_gpr_replica_ivalue_t); ival = OBJ_NEW(orte_gpr_replica_ivalue_t);
if (NULL == ival) { if (NULL == ival) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_RELEASE(sub);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
@ -114,6 +123,8 @@ orte_gpr_replica_register_subscription(orte_gpr_replica_subscription_t **subptr,
if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_seg(&(ival->seg), true, if (ORTE_SUCCESS != (rc = orte_gpr_replica_find_seg(&(ival->seg), true,
subscription->values[i]->segment))) { subscription->values[i]->segment))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
OBJ_RELEASE(sub);
OBJ_RELEASE(ival);
return rc; return rc;
} }
tok_mode = 0x004f & subscription->values[i]->addr_mode; tok_mode = 0x004f & subscription->values[i]->addr_mode;
@ -132,11 +143,15 @@ orte_gpr_replica_register_subscription(orte_gpr_replica_subscription_t **subptr,
if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_itag_list(&tokentags, ival->seg, if (ORTE_SUCCESS != (rc = orte_gpr_replica_get_itag_list(&tokentags, ival->seg,
subscription->values[i]->tokens, &num_tokens))) { subscription->values[i]->tokens, &num_tokens))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
OBJ_RELEASE(sub);
OBJ_RELEASE(ival);
return rc; return rc;
} }
if (ORTE_SUCCESS != (rc = orte_value_array_set_size(&(ival->tokentags), (size_t)num_tokens))) { if (ORTE_SUCCESS != (rc = orte_value_array_set_size(&(ival->tokentags), (size_t)num_tokens))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
OBJ_RELEASE(sub);
OBJ_RELEASE(ival);
return rc; return rc;
} }
for (j=0; j < num_tokens; j++) { for (j=0; j < num_tokens; j++) {
@ -152,6 +167,8 @@ orte_gpr_replica_register_subscription(orte_gpr_replica_subscription_t **subptr,
num_keys = subscription->values[i]->cnt; num_keys = subscription->values[i]->cnt;
if (ORTE_SUCCESS != (rc = orte_value_array_set_size(&(ival->keytags), num_keys))) { if (ORTE_SUCCESS != (rc = orte_value_array_set_size(&(ival->keytags), num_keys))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
OBJ_RELEASE(sub);
OBJ_RELEASE(ival);
return rc; return rc;
} }
for (j=0; j < num_keys; j++) { for (j=0; j < num_keys; j++) {
@ -159,6 +176,8 @@ orte_gpr_replica_register_subscription(orte_gpr_replica_subscription_t **subptr,
ival->seg, ival->seg,
subscription->values[i]->keyvals[j]->key))) { subscription->values[i]->keyvals[j]->key))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
OBJ_RELEASE(sub);
OBJ_RELEASE(ival);
return rc; return rc;
} }
ORTE_VALUE_ARRAY_SET_ITEM(&(ival->keytags), orte_gpr_replica_itag_t, ORTE_VALUE_ARRAY_SET_ITEM(&(ival->keytags), orte_gpr_replica_itag_t,
@ -168,6 +187,8 @@ orte_gpr_replica_register_subscription(orte_gpr_replica_subscription_t **subptr,
/* add the object to the subscription's value pointer array */ /* add the object to the subscription's value pointer array */
if (0 > (rc = orte_pointer_array_add(&(ival->index), sub->values, ival))) { if (0 > (rc = orte_pointer_array_add(&(ival->index), sub->values, ival))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_RELEASE(sub);
OBJ_RELEASE(ival);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
(sub->num_values)++; (sub->num_values)++;
@ -175,6 +196,7 @@ orte_gpr_replica_register_subscription(orte_gpr_replica_subscription_t **subptr,
/* add the object to the replica's subscriptions pointer array */ /* add the object to the replica's subscriptions pointer array */
if (0 > (rc = orte_pointer_array_add(&(sub->index), orte_gpr_replica.subscriptions, sub))) { if (0 > (rc = orte_pointer_array_add(&(sub->index), orte_gpr_replica.subscriptions, sub))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_RELEASE(sub);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
(orte_gpr_replica.num_subs)++; (orte_gpr_replica.num_subs)++;
@ -273,11 +295,18 @@ orte_gpr_replica_register_trigger(orte_gpr_replica_trigger_t **trigptr,
* triggers that do not merit such consideration. * triggers that do not merit such consideration.
*/ */
/* see if another trigger is available */
if (ORTE_GPR_TRIGGER_ID_MAX-1 < orte_gpr_replica.num_trigs) { /* none left! */
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
trig = OBJ_NEW(orte_gpr_replica_trigger_t); trig = OBJ_NEW(orte_gpr_replica_trigger_t);
if (NULL == trig) { if (NULL == trig) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
trig->idtag = orte_gpr_replica.num_trigs;
/* if a name for this trigger has been provided, copy it over */ /* if a name for this trigger has been provided, copy it over */
if (NULL != trigger->name) { if (NULL != trigger->name) {
@ -331,7 +360,8 @@ orte_gpr_replica_register_trigger(orte_gpr_replica_trigger_t **trigptr,
goto CLEANUP; goto CLEANUP;
} }
if (0 == orte_gpr_replica_globals.num_srch_cptr) { /* no existing container found - create one using all the tokens */ if (0 == orte_gpr_replica_globals.num_srch_cptr) {
/* no existing container found - create one using all the tokens */
if (ORTE_SUCCESS != (rc = orte_gpr_replica_create_container(&cptr2, seg, if (ORTE_SUCCESS != (rc = orte_gpr_replica_create_container(&cptr2, seg,
num_tokens, tokentags))) { num_tokens, tokentags))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
@ -392,7 +422,8 @@ orte_gpr_replica_register_trigger(orte_gpr_replica_trigger_t **trigptr,
0 < orte_gpr_replica_globals.num_srch_ival) { 0 < orte_gpr_replica_globals.num_srch_ival) {
/* this key already exists - make sure it's unique /* this key already exists - make sure it's unique
*/ */
if (1 < orte_gpr_replica_globals.num_srch_ival || found) { /* not unique - error out */ if (1 < orte_gpr_replica_globals.num_srch_ival || found) {
/* not unique - error out */
ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
rc = ORTE_ERR_BAD_PARAM; rc = ORTE_ERR_BAD_PARAM;
goto CLEANUP; goto CLEANUP;
@ -469,12 +500,23 @@ ADDREQ:
* the data * the data
*/ */
if (ORTE_GPR_TRIG_ROUTE_DATA_THRU_ME & trig->action) { if (ORTE_GPR_TRIG_ROUTE_DATA_THRU_ME & trig->action) {
if (NULL != trig->master) { if (NULL == trig->master) {
/* someone already requested this responsibility. /* someone already requested this responsibility.
* this is an error - report it * if I'm a singleton, this is NOT an error - the
* initial "launch" has recorded the stage gate
* triggers using the [-1,-1,-1] name, so we need to
* overwrite that with my name so I get the notifications.
*/ */
ORTE_ERROR_LOG(ORTE_ERR_NOT_AVAILABLE); #if 0
if (orte_process_info.singleton || orte_process_info.seed) {
opal_output(0, "Trigger master being redefined");
trig->master = req;
} else { } else {
/* if i'm not a singleton, then this is an error - report it */
ORTE_ERROR_LOG(ORTE_ERR_NOT_AVAILABLE);
}
} else {
#endif
trig->master = req; trig->master = req;
} }
} }
@ -911,6 +953,37 @@ int orte_gpr_replica_check_trig(orte_gpr_replica_trigger_t *trig)
return ORTE_SUCCESS; /* neither cmp nor at level set */ return ORTE_SUCCESS; /* neither cmp nor at level set */
FIRED: FIRED:
/* if this trigger wants everything routed through a "master", then we register
* this as a trigger_callback.
*/
if (NULL != trig->master) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_register_trigger_callback(trig))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* for each subscription assocated with this trigger, check to see if
* the subscription needs any special treatment
*/
subs = (orte_gpr_replica_subscription_t**)(trig->subscriptions)->addr;
for (i=0, j=0; j < trig->num_subscriptions &&
i < (trig->subscriptions)->size; i++) {
if (NULL != subs[i]) {
j++;
/* if ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG set, set the subscription
* "active" to indicate that trigger fired
*/
if (ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG & subs[i]->action) {
subs[i]->active = true;
}
/* if ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG set, then set the flag
* so it can be cleaned up later
*/
if (ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG & subs[i]->action) {
subs[i]->cleanup = true;
}
}
}
} else {
/* for each subscription associated with this trigger, we need to /* for each subscription associated with this trigger, we need to
* register a callback to the requestor that returns the specified * register a callback to the requestor that returns the specified
* data * data
@ -920,7 +993,7 @@ FIRED:
i < (trig->subscriptions)->size; i++) { i < (trig->subscriptions)->size; i++) {
if (NULL != subs[i]) { if (NULL != subs[i]) {
j++; j++;
if (ORTE_SUCCESS != (rc = orte_gpr_replica_register_callback(trig, subs[i], NULL))) { if (ORTE_SUCCESS != (rc = orte_gpr_replica_register_callback(subs[i], NULL))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
@ -938,6 +1011,8 @@ FIRED:
} }
} }
} }
}
/* set the processing flag so we don't go into infinite loop if /* set the processing flag so we don't go into infinite loop if
* any callback functions modify the registry * any callback functions modify the registry
@ -1057,7 +1132,7 @@ int orte_gpr_replica_check_subscription(orte_gpr_replica_subscription_t *sub)
goto CLEANUP; goto CLEANUP;
} }
if (ORTE_SUCCESS != (rc = if (ORTE_SUCCESS != (rc =
orte_gpr_replica_register_callback(NULL, sub, value))) { orte_gpr_replica_register_callback(sub, value))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto CLEANUP; goto CLEANUP;
} }

Просмотреть файл

@ -77,6 +77,7 @@ typedef uint8_t orte_gpr_replica_action_t;
typedef struct { typedef struct {
opal_object_t super; /**< Allows this to be an object */ opal_object_t super; /**< Allows this to be an object */
orte_gpr_subscription_id_t id; /**< id of this subscription */ orte_gpr_subscription_id_t id; /**< id of this subscription */
size_t index; /**< location of this subscription in array */
char *name; char *name;
orte_gpr_notify_cb_fn_t callback; /**< Function to be called for notificaiton */ orte_gpr_notify_cb_fn_t callback; /**< Function to be called for notificaiton */
void *user_tag; /**< User-provided tag for callback function */ void *user_tag; /**< User-provided tag for callback function */
@ -92,6 +93,7 @@ OBJ_CLASS_DECLARATION(orte_gpr_replica_local_subscriber_t);
typedef struct { typedef struct {
opal_object_t super; /**< Allows this to be an object */ opal_object_t super; /**< Allows this to be an object */
orte_gpr_trigger_id_t id; /**< id of this trigger */ orte_gpr_trigger_id_t id; /**< id of this trigger */
size_t index; /**< location of this trigger in array */
char *name; char *name;
orte_gpr_trigger_cb_fn_t callback; /**< Function to be called for notification */ orte_gpr_trigger_cb_fn_t callback; /**< Function to be called for notification */
void *user_tag; /**< User-provided tag for callback function */ void *user_tag; /**< User-provided tag for callback function */
@ -148,9 +150,9 @@ struct orte_gpr_replica_t {
orte_pointer_array_t *segments; /**< Managed array of pointers to segment objects */ orte_pointer_array_t *segments; /**< Managed array of pointers to segment objects */
size_t num_segs; size_t num_segs;
orte_pointer_array_t *triggers; /**< Managed array of pointers to triggers */ orte_pointer_array_t *triggers; /**< Managed array of pointers to triggers */
size_t num_trigs; orte_gpr_trigger_id_t num_trigs;
orte_pointer_array_t *subscriptions; /**< Managed array of pointers to subscriptions */ orte_pointer_array_t *subscriptions; /**< Managed array of pointers to subscriptions */
size_t num_subs; orte_gpr_subscription_id_t num_subs;
bool processing_callbacks; bool processing_callbacks;
opal_list_t callbacks; /**< List of callbacks to be processed */ opal_list_t callbacks; /**< List of callbacks to be processed */
}; };
@ -260,17 +262,18 @@ typedef struct {
orte_process_name_t *requestor; orte_process_name_t *requestor;
/* idtag associated with this subscription */ /* idtag associated with this subscription */
orte_gpr_subscription_id_t idtag; orte_gpr_subscription_id_t idtag;
/* for a local subscription, where this block of data goes */
orte_gpr_notify_cb_fn_t callback; /**< Function to be called for notification */
void *user_tag; /**< User-provided tag for callback function */
} orte_gpr_replica_requestor_t; } orte_gpr_replica_requestor_t;
OBJ_CLASS_DECLARATION(orte_gpr_replica_requestor_t); OBJ_CLASS_DECLARATION(orte_gpr_replica_requestor_t);
typedef struct { typedef struct {
opal_object_t super; /**< Makes this an object */ opal_object_t super; /**< Makes this an object */
/* index of this entry in subscription array - corresponds to local idtag */ /* index of this entry in subscription array */
size_t index; size_t index;
/* idtag for the subscription - may be different than index since
* the data type can be different than size_t
*/
orte_gpr_subscription_id_t idtag;
/* name of this subscription, if provided */ /* name of this subscription, if provided */
char *name; char *name;
/* boolean indicating if this subscription is active or not */ /* boolean indicating if this subscription is active or not */
@ -323,8 +326,10 @@ struct orte_gpr_replica_trigger_t {
opal_object_t super; /**< Make this an object */ opal_object_t super; /**< Make this an object */
/* name of this trigger, if provided */ /* name of this trigger, if provided */
char *name; char *name;
/* index of this trigger in the triggers array - corresponds to local idtag */ /* index of this trigger in the triggers array */
size_t index; size_t index;
/* trigger id on the local system */
orte_gpr_trigger_id_t idtag;
/* array of requestors that have "attached" themselves to this trigger */ /* array of requestors that have "attached" themselves to this trigger */
size_t num_attached; size_t num_attached;
orte_pointer_array_t *attached; orte_pointer_array_t *attached;

Просмотреть файл

@ -37,6 +37,8 @@
static void orte_gpr_replica_local_subscriber_constructor(orte_gpr_replica_local_subscriber_t *ptr) static void orte_gpr_replica_local_subscriber_constructor(orte_gpr_replica_local_subscriber_t *ptr)
{ {
ptr->name = NULL; ptr->name = NULL;
ptr->callback = NULL;
ptr->user_tag = NULL;
} }
static void orte_gpr_replica_local_subscriber_destructor(orte_gpr_replica_local_subscriber_t *ptr) static void orte_gpr_replica_local_subscriber_destructor(orte_gpr_replica_local_subscriber_t *ptr)
@ -55,6 +57,8 @@ OBJ_CLASS_INSTANCE(
static void orte_gpr_replica_local_trigger_constructor(orte_gpr_replica_local_trigger_t *ptr) static void orte_gpr_replica_local_trigger_constructor(orte_gpr_replica_local_trigger_t *ptr)
{ {
ptr->name = NULL; ptr->name = NULL;
ptr->callback = NULL;
ptr->user_tag = NULL;
} }
static void orte_gpr_replica_local_trigger_destructor(orte_gpr_replica_local_trigger_t *ptr) static void orte_gpr_replica_local_trigger_destructor(orte_gpr_replica_local_trigger_t *ptr)
@ -303,6 +307,7 @@ OBJ_CLASS_INSTANCE(
static void orte_gpr_replica_subscription_construct(orte_gpr_replica_subscription_t* sub) static void orte_gpr_replica_subscription_construct(orte_gpr_replica_subscription_t* sub)
{ {
sub->index = 0; sub->index = 0;
sub->idtag = ORTE_GPR_SUBSCRIPTION_ID_MAX;
sub->name = NULL; sub->name = NULL;
sub->active = false; sub->active = false;
sub->processing = false; sub->processing = false;
@ -367,6 +372,7 @@ OBJ_CLASS_INSTANCE(
static void orte_gpr_replica_trigger_requestor_construct(orte_gpr_replica_trigger_requestor_t* ptr) static void orte_gpr_replica_trigger_requestor_construct(orte_gpr_replica_trigger_requestor_t* ptr)
{ {
ptr->index = 0; ptr->index = 0;
ptr->idtag = ORTE_GPR_TRIGGER_ID_MAX;
ptr->requestor = NULL; ptr->requestor = NULL;
ptr->idtag = 0; ptr->idtag = 0;
} }
@ -391,6 +397,7 @@ static void orte_gpr_replica_trigger_construct(orte_gpr_replica_trigger_t* trig)
{ {
trig->name = NULL; trig->name = NULL;
trig->index = 0; trig->index = 0;
trig->idtag = ORTE_GPR_TRIGGER_ID_MAX;
trig->num_attached = 0; trig->num_attached = 0;
orte_pointer_array_init(&(trig->attached), orte_gpr_array_block_size, orte_pointer_array_init(&(trig->attached), orte_gpr_array_block_size,

Просмотреть файл

@ -82,6 +82,7 @@ static orte_gpr_base_module_t orte_gpr_replica_module = {
/* GENERAL OPERATIONS */ /* GENERAL OPERATIONS */
orte_gpr_replica_preallocate_segment, orte_gpr_replica_preallocate_segment,
orte_gpr_base_xfer_payload, orte_gpr_base_xfer_payload,
orte_gpr_replica_deliver_notify_msg,
/* ARITHMETIC OPERATIONS */ /* ARITHMETIC OPERATIONS */
orte_gpr_replica_increment_value, orte_gpr_replica_increment_value,
orte_gpr_replica_decrement_value, orte_gpr_replica_decrement_value,
@ -90,6 +91,7 @@ static orte_gpr_base_module_t orte_gpr_replica_module = {
orte_gpr_base_subscribe_1, orte_gpr_base_subscribe_1,
orte_gpr_base_subscribe_N, orte_gpr_base_subscribe_N,
orte_gpr_base_define_trigger, orte_gpr_base_define_trigger,
orte_gpr_base_define_trigger_level,
orte_gpr_replica_unsubscribe, orte_gpr_replica_unsubscribe,
orte_gpr_replica_cancel_trigger, orte_gpr_replica_cancel_trigger,
/* COMPOUND COMMANDS */ /* COMPOUND COMMANDS */
@ -101,6 +103,8 @@ static orte_gpr_base_module_t orte_gpr_replica_module = {
orte_gpr_replica_dump_segments, orte_gpr_replica_dump_segments,
orte_gpr_replica_dump_triggers, orte_gpr_replica_dump_triggers,
orte_gpr_replica_dump_subscriptions, orte_gpr_replica_dump_subscriptions,
orte_gpr_replica_dump_a_trigger,
orte_gpr_replica_dump_a_subscription,
orte_gpr_replica_dump_local_triggers, orte_gpr_replica_dump_local_triggers,
orte_gpr_replica_dump_local_subscriptions, orte_gpr_replica_dump_local_subscriptions,
orte_gpr_replica_dump_callbacks, orte_gpr_replica_dump_callbacks,

Просмотреть файл

@ -235,6 +235,7 @@ PROCESS:
nptr->cellid = 0; nptr->cellid = 0;
nptr->jobid = job; nptr->jobid = job;
nptr->vpid = (orte_vpid_t)k; nptr->vpid = (orte_vpid_t)k;
nptr++;
} }
*num_procs = (size_t)ptr[j]->next_vpid; *num_procs = (size_t)ptr[j]->next_vpid;

Просмотреть файл

@ -26,6 +26,7 @@
#include "dps/dps_types.h" #include "dps/dps_types.h"
#include "mca/mca.h" #include "mca/mca.h"
#include "mca/ns/ns_types.h" #include "mca/ns/ns_types.h"
#include "mca/gpr/gpr_types.h"
#include "mca/oob/oob_types.h" #include "mca/oob/oob_types.h"
#ifdef HAVE_SYS_UIO_H #ifdef HAVE_SYS_UIO_H
@ -406,7 +407,7 @@ OMPI_DECLSPEC int mca_oob_xcast(
orte_process_name_t* peers, orte_process_name_t* peers,
size_t num_peers, size_t num_peers,
orte_buffer_t* buffer, orte_buffer_t* buffer,
mca_oob_callback_packed_fn_t cbfunc); orte_gpr_trigger_cb_fn_t cbfunc);
#if defined(c_plusplus) || defined(__cplusplus) #if defined(c_plusplus) || defined(__cplusplus)
} }

Просмотреть файл

@ -20,6 +20,7 @@
#include "include/constants.h" #include "include/constants.h"
#include "opal/util/output.h" #include "opal/util/output.h"
#include "util/proc_info.h" #include "util/proc_info.h"
#include "orte/dps/dps.h"
#include "mca/oob/oob.h" #include "mca/oob/oob.h"
#include "mca/oob/base/base.h" #include "mca/oob/base/base.h"
#include "mca/ns/ns.h" #include "mca/ns/ns.h"
@ -46,7 +47,7 @@ int mca_oob_xcast(
orte_process_name_t* peers, orte_process_name_t* peers,
size_t num_peers, size_t num_peers,
orte_buffer_t* buffer, orte_buffer_t* buffer,
mca_oob_callback_packed_fn_t cbfunc) orte_gpr_trigger_cb_fn_t cbfunc)
{ {
size_t i; size_t i;
int rc; int rc;
@ -74,14 +75,29 @@ int mca_oob_xcast(
} }
} else { } else {
orte_buffer_t rbuf; orte_buffer_t rbuf;
orte_gpr_notify_message_t *msg;
OBJ_CONSTRUCT(&rbuf, orte_buffer_t); OBJ_CONSTRUCT(&rbuf, orte_buffer_t);
rc = mca_oob_recv_packed(MCA_OOB_NAME_ANY, &rbuf, tag); rc = mca_oob_recv_packed(MCA_OOB_NAME_ANY, &rbuf, tag);
if(rc < 0) { if(rc < 0) {
OBJ_DESTRUCT(&rbuf); OBJ_DESTRUCT(&rbuf);
return rc; return rc;
} }
if(cbfunc != NULL) if (cbfunc != NULL) {
cbfunc(rc, root, &rbuf, tag, NULL); msg = OBJ_NEW(orte_gpr_notify_message_t);
if (NULL == msg) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
i=1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(&rbuf, &msg, &i, ORTE_GPR_NOTIFY_MSG))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(msg);
return rc;
}
cbfunc(msg);
OBJ_RELEASE(msg);
}
OBJ_DESTRUCT(&rbuf); OBJ_DESTRUCT(&rbuf);
} }
return ORTE_SUCCESS; return ORTE_SUCCESS;

Просмотреть файл

@ -194,7 +194,7 @@ typedef int (*mca_oob_base_module_xcast_fn_t)(orte_process_name_t* root,
orte_process_name_t* peers, orte_process_name_t* peers,
size_t num_peers, size_t num_peers,
orte_buffer_t* buffer, orte_buffer_t* buffer,
mca_oob_callback_packed_fn_t cbfunc); orte_gpr_trigger_cb_fn_t cbfunc);
/** /**
* OOB Module * OOB Module

Просмотреть файл

@ -616,8 +616,9 @@ int mca_oob_tcp_resolve(mca_oob_tcp_peer_t* peer)
{ {
mca_oob_tcp_addr_t* addr; mca_oob_tcp_addr_t* addr;
mca_oob_tcp_subscription_t* subscription; mca_oob_tcp_subscription_t* subscription;
orte_gpr_trigger_t trig, *trigs; char *segment, *sub_name, *trig_name;
orte_gpr_subscription_t sub, *subs; char *key="oob-tcp";
orte_gpr_subscription_id_t sub_id;
opal_list_item_t* item; opal_list_item_t* item;
int rc; int rc;
@ -642,111 +643,55 @@ int mca_oob_tcp_resolve(mca_oob_tcp_peer_t* peer)
} }
} }
OBJ_CONSTRUCT(&sub, orte_gpr_subscription_t); if (ORTE_SUCCESS != (rc = orte_schema.get_std_subscription_name(&sub_name,
/* indicate that this is a standard subscription. This indicates that the
* subscription will be common to all processes. Thus, the resulting data
* can be consolidated into a process-independent message and broadcast
* to all processes
*/
if (ORTE_SUCCESS != (rc = orte_schema.get_std_subscription_name(&(sub.name),
OMPI_OOB_SUBSCRIPTION, peer->peer_name.jobid))) { OMPI_OOB_SUBSCRIPTION, peer->peer_name.jobid))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* send data when trigger fires, continue to monitor. The default
* action for any subscription that includes a trigger condition is
* to send the specified data when the trigger fires. This set of flags
* indicates that - AFTER the trigger fires - the subscription should
* continue to send data any time an entry is added or changed.
*/
sub.action = ORTE_GPR_NOTIFY_ADD_ENTRY |
ORTE_GPR_NOTIFY_VALUE_CHG |
ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG;
/* setup the value structures that describe the data to /* attach to the stage-1 standard trigger */
* be monitored and returned by this subscription if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&trig_name,
*/
sub.cnt = 1;
sub.values = (orte_gpr_value_t**)malloc(sizeof(orte_gpr_value_t*));
if (NULL == sub.values) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub.values[0] = OBJ_NEW(orte_gpr_value_t);
if (NULL == sub.values[0]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub.cnt = 1;
/* define the segment */
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(
&(sub.values[0]->segment),
peer->peer_name.jobid))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&sub);
return rc;
}
sub.values[0]->addr_mode = ORTE_GPR_KEYS_OR | ORTE_GPR_TOKENS_OR;
/* look at all containers on this segment */
sub.values[0]->tokens = NULL;
sub.values[0]->num_tokens = 0;
/* look for any keyval with "modex" key */
sub.values[0]->cnt = 1;
sub.values[0]->keyvals = (orte_gpr_keyval_t**)malloc(sizeof(orte_gpr_keyval_t*));
if (NULL == sub.values[0]->keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub.values[0]->keyvals[0] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == sub.values[0]->keyvals[0]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub.values[0]->keyvals[0]->key = strdup("oob-tcp");
if (NULL == sub.values[0]->keyvals[0]->key) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* define the callback function */
sub.cbfunc = mca_oob_tcp_registry_callback;
sub.user_tag = NULL;
/* setup the trigger value */
OBJ_CONSTRUCT(&trig, orte_gpr_trigger_t);
if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&(trig.name),
ORTE_STG1_TRIGGER, peer->peer_name.jobid))) { ORTE_STG1_TRIGGER, peer->peer_name.jobid))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
free(sub_name);
return rc; return rc;
} }
/* this is an ORTE-standard trigger that is defined by the ORTE resource manager /* define the segment */
* when the job was launched - therefore, we don't need to provide any additional if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment,
* info peer->peer_name.jobid))) {
*/ ORTE_ERROR_LOG(rc);
free(sub_name);
free(trig_name);
return rc;
}
if (ORTE_SUCCESS != (rc = orte_gpr.subscribe_1(&sub_id, trig_name, sub_name,
ORTE_GPR_NOTIFY_ADD_ENTRY |
ORTE_GPR_NOTIFY_VALUE_CHG |
ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG,
ORTE_GPR_KEYS_OR | ORTE_GPR_TOKENS_OR,
segment,
NULL, /* look at all containers on this segment */
key,
mca_oob_tcp_registry_callback, NULL))) {
ORTE_ERROR_LOG(rc);
free(sub_name);
free(trig_name);
free(segment);
return rc;
}
trigs = &trig;
subs = &sub;
subscription = OBJ_NEW(mca_oob_tcp_subscription_t); subscription = OBJ_NEW(mca_oob_tcp_subscription_t);
subscription->jobid = peer->peer_name.jobid; subscription->jobid = peer->peer_name.jobid;
rc = orte_gpr.subscribe(1, &subs, 1, &trigs); /* the id of each subscription is recorded
if(rc != OMPI_SUCCESS) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&sub);
OBJ_DESTRUCT(&trig);
return rc;
}
/* the id of each subscription is stored by the system in the corresponding
* subscription object we passed into orte_gpr.subscribe. We record it
* here so we can (if desired) cancel that subscription later * here so we can (if desired) cancel that subscription later
*/ */
subscription->subid = sub.id; subscription->subid = sub_id;
/* done with these, so release any memory */ /* done with these, so release any memory */
OBJ_DESTRUCT(&sub); free(trig_name);
OBJ_DESTRUCT(&trig); free(sub_name);
free(segment);
opal_list_append(&mca_oob_tcp_component.tcp_subscriptions, &subscription->item); opal_list_append(&mca_oob_tcp_component.tcp_subscriptions, &subscription->item);
OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock); OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock);
@ -761,13 +706,16 @@ int mca_oob_tcp_init(void)
{ {
orte_jobid_t jobid; orte_jobid_t jobid;
orte_buffer_t *buffer; orte_buffer_t *buffer;
orte_gpr_trigger_t trig, *trigs; orte_gpr_subscription_id_t sub_id;
orte_gpr_value_t *value; char *sub_name, *segment, *trig_name, **tokens;
char *keys[] = {"oob-tcp", ORTE_PROC_RML_IP_ADDRESS_KEY};
orte_data_type_t types[2];
orte_gpr_value_union_t values[2];
mca_oob_tcp_subscription_t *subscription; mca_oob_tcp_subscription_t *subscription;
orte_gpr_subscription_t sub, *subs;
int rc; int rc;
opal_list_item_t* item; opal_list_item_t* item;
char *tmp, *tmp2, *tmp3; char *tmp, *tmp2, *tmp3;
size_t num_tokens;
/* random delay to stagger connections back to seed */ /* random delay to stagger connections back to seed */
#if defined(WIN32) #if defined(WIN32)
@ -807,173 +755,76 @@ int mca_oob_tcp_init(void)
ORTE_NAME_ARGS(orte_process_info.my_name)); ORTE_NAME_ARGS(orte_process_info.my_name));
} }
/* setup the subscription description value */ if (ORTE_SUCCESS != (rc = orte_schema.get_std_subscription_name(&sub_name,
OBJ_CONSTRUCT(&sub, orte_gpr_subscription_t);
/* indicate that this is a standard subscription. This indicates that the
* subscription will be common to all processes. Thus, the resulting data
* can be consolidated into a process-independent message and broadcast
* to all processes
*/
if (ORTE_SUCCESS != (rc = orte_schema.get_std_subscription_name(&(sub.name),
OMPI_OOB_SUBSCRIPTION, jobid))) { OMPI_OOB_SUBSCRIPTION, jobid))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
/* send data when trigger fires, continue to monitor. The default
* action for any subscription that includes a trigger condition is
* to send the specified data when the trigger fires. This set of flags
* indicates that - AFTER the trigger fires - the subscription should
* continue to send data any time an entry is added or changed.
*/
sub.action = ORTE_GPR_NOTIFY_ADD_ENTRY |
ORTE_GPR_NOTIFY_VALUE_CHG |
ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG;
/* setup the value structures that describe the data to /* attach to the stage-1 standard trigger */
* be monitored and returned by this subscription if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&trig_name,
*/ ORTE_STG1_TRIGGER, jobid))) {
sub.cnt = 1; ORTE_ERROR_LOG(rc);
sub.values = (orte_gpr_value_t**)malloc(sizeof(orte_gpr_value_t*)); free(sub_name);
if (NULL == sub.values) { return rc;
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub.values[0] = OBJ_NEW(orte_gpr_value_t);
if (NULL == sub.values[0]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
} }
/* define the segment */ /* define the segment */
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name( if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) {
&(sub.values[0]->segment),
jobid))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&sub);
return rc;
}
sub.values[0]->addr_mode = ORTE_GPR_KEYS_OR | ORTE_GPR_TOKENS_OR;
/* look at all containers on this segment */
sub.values[0]->tokens = NULL;
sub.values[0]->num_tokens = 0;
/* look for any keyval with "modex" key */
sub.values[0]->cnt = 1;
sub.values[0]->keyvals = (orte_gpr_keyval_t**)malloc(sizeof(orte_gpr_keyval_t*));
if (NULL == sub.values[0]->keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub.values[0]->keyvals[0] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == sub.values[0]->keyvals[0]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub.values[0]->keyvals[0]->key = strdup("oob-tcp");
if (NULL == sub.values[0]->keyvals[0]->key) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* define the callback function */
sub.cbfunc = mca_oob_tcp_registry_callback;
sub.user_tag = NULL;
/* setup the trigger value */
OBJ_CONSTRUCT(&trig, orte_gpr_trigger_t);
if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&(trig.name),
ORTE_STG1_TRIGGER, jobid))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
free(sub_name);
free(trig_name);
return rc; return rc;
} }
/* this is an ORTE-standard trigger that is defined by the ORTE resource manager if (ORTE_SUCCESS != (rc = orte_gpr.subscribe_1(&sub_id, trig_name, sub_name,
* when the job was launched - therefore, we don't need to provide any additional ORTE_GPR_NOTIFY_ADD_ENTRY |
* info ORTE_GPR_NOTIFY_VALUE_CHG |
*/ ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG,
ORTE_GPR_KEYS_OR | ORTE_GPR_TOKENS_OR,
segment,
trigs = &trig; NULL, /* look at all containers on this segment */
subs = &sub; keys[0],
subscription = OBJ_NEW(mca_oob_tcp_subscription_t); mca_oob_tcp_registry_callback, NULL))) {
subscription->jobid = jobid;
rc = orte_gpr.subscribe(1, &subs, 1, &trigs);
if(rc != OMPI_SUCCESS) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&sub); free(sub_name);
OBJ_DESTRUCT(&trig); free(trig_name);
free(segment);
return rc; return rc;
} }
/* the id of each subscription is stored by the system in the corresponding /* the id of each subscription is recorded
* subscription object we passed into orte_gpr.subscribe. We record it
* here so we can (if desired) cancel that subscription later * here so we can (if desired) cancel that subscription later
*/ */
subscription->subid = sub.id; subscription->subid = sub_id;
/* done with these, so release any memory */ /* done with these, so release any memory */
OBJ_DESTRUCT(&sub); free(trig_name);
OBJ_DESTRUCT(&trig); free(sub_name);
/* now setup to put our contact info on registry */
buffer = OBJ_NEW(orte_buffer_t); buffer = OBJ_NEW(orte_buffer_t);
if(buffer == NULL) { if(buffer == NULL) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return OMPI_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
rc = mca_oob_tcp_addr_pack(buffer); if (ORTE_SUCCESS != (rc = mca_oob_tcp_addr_pack(buffer))) {
if(rc != OMPI_SUCCESS) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buffer); OBJ_RELEASE(buffer);
return rc; return rc;
} }
/* put our contact info in registry */ /* extract payload for storage */
value = OBJ_NEW(orte_gpr_value_t); types[0] = ORTE_BYTE_OBJECT;
if (NULL == value) { if (ORTE_SUCCESS != (rc = orte_dps.unload(buffer, (void**)&(values[0].byteobject.bytes),
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); &(values[0].byteobject.size)))) {
return ORTE_ERR_OUT_OF_RESOURCE;
}
value->addr_mode = ORTE_GPR_OVERWRITE | ORTE_GPR_TOKENS_XAND;
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&(value->segment), jobid))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; free(segment);
}
value->cnt = 2;
value->keyvals = (orte_gpr_keyval_t**)malloc(value->cnt * sizeof(orte_gpr_keyval_t*));
if(NULL == value->keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value->keyvals[0] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == value->keyvals[0]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value->keyvals[1] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == value->keyvals[1]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (ORTE_SUCCESS != (rc = orte_schema.get_proc_tokens(&(value->tokens),
&(value->num_tokens), orte_process_info.my_name))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(value);
return rc;
}
(value->keyvals[0])->type = ORTE_BYTE_OBJECT;
(value->keyvals[0])->key = strdup("oob-tcp");
rc = orte_dps.unload(buffer, (void**)&(value->keyvals[0])->value.byteobject.bytes,
&(value->keyvals[0])->value.byteobject.size);
if(rc != ORTE_SUCCESS) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(value);
OBJ_RELEASE(buffer); OBJ_RELEASE(buffer);
return rc; return rc;
} }
OBJ_RELEASE(buffer);
(value->keyvals[1])->type = ORTE_STRING; /* setup the IP address for storage */
(value->keyvals[1])->key = strdup(ORTE_PROC_RML_IP_ADDRESS_KEY);
tmp = mca_oob.oob_get_addr(); tmp = mca_oob.oob_get_addr();
tmp2 = strrchr(tmp, '/') + 1; tmp2 = strrchr(tmp, '/') + 1;
tmp3 = strrchr(tmp, ':'); tmp3 = strrchr(tmp, ':');
@ -982,33 +833,37 @@ int mca_oob_tcp_init(void)
"returned for selected oob interfaces.\n", "returned for selected oob interfaces.\n",
ORTE_NAME_ARGS(orte_process_info.my_name), tmp); ORTE_NAME_ARGS(orte_process_info.my_name), tmp);
ORTE_ERROR_LOG(ORTE_ERROR); ORTE_ERROR_LOG(ORTE_ERROR);
free(segment);
free(tmp);
free(values[0].byteobject.bytes);
return ORTE_ERROR; return ORTE_ERROR;
} }
*tmp3 = '\0'; *tmp3 = '\0';
(value->keyvals[1])->value.strptr = strdup(tmp2); types[1] = ORTE_STRING;
values[1].strptr = strdup(tmp2);
free(tmp); free(tmp);
if(mca_oob_tcp_component.tcp_debug > 2) { /* get the process tokens */
opal_output(0, "[%lu,%lu,%lu] mca_oob_tcp_init: calling orte_gpr.put(%s)\n", if (ORTE_SUCCESS != (rc = orte_schema.get_proc_tokens(&tokens, &num_tokens,
ORTE_NAME_ARGS(orte_process_info.my_name), orte_process_info.my_name))) {
value->segment);
}
rc = orte_gpr.put(1, &value);
if(rc != OMPI_SUCCESS) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
OBJ_RELEASE(value); free(segment);
OBJ_RELEASE(buffer); free(values[0].byteobject.bytes);
free(values[1].strptr);
return rc; return rc;
} }
OBJ_RELEASE(buffer);
OBJ_RELEASE(value);
if(rc != ORTE_SUCCESS) { /* put our contact info in registry */
if (ORTE_SUCCESS != (rc = orte_gpr.put_N(ORTE_GPR_OVERWRITE | ORTE_GPR_TOKENS_XAND,
segment, tokens, 2, keys, types, values))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc;
} }
return OMPI_SUCCESS;
free(segment);
free(values[0].byteobject.bytes);
free(values[1].strptr);
return rc;
} }
/* /*

Просмотреть файл

@ -115,12 +115,10 @@ int orte_rmgr_base_terminate_job_not_available(orte_jobid_t);
int orte_rmgr_base_terminate_proc_not_available(const orte_process_name_t*); int orte_rmgr_base_terminate_proc_not_available(const orte_process_name_t*);
int orte_rmgr_base_proc_stage_gate_init(orte_jobid_t job); int orte_rmgr_base_proc_stage_gate_init(orte_jobid_t job);
int orte_rmgr_base_proc_stage_gate_subscribe(orte_jobid_t job, orte_gpr_notify_cb_fn_t, void*); int orte_rmgr_base_proc_stage_gate_subscribe(orte_jobid_t job, orte_gpr_notify_cb_fn_t, void*);
void orte_rmgr_base_proc_stage_gate_mgr( int orte_rmgr_base_proc_stage_gate_mgr(
orte_gpr_notify_data_t *data, orte_gpr_notify_message_t *msg);
void *user_tag); int orte_rmgr_base_proc_stage_gate_mgr_abort(
void orte_rmgr_base_proc_stage_gate_mgr_abort( orte_gpr_notify_message_t *msg);
orte_gpr_notify_data_t *data,
void *user_tag);
int orte_rmgr_base_spawn_not_available( int orte_rmgr_base_spawn_not_available(
orte_app_context_t** app_context, orte_app_context_t** app_context,
size_t num_context, size_t num_context,

Просмотреть файл

@ -30,6 +30,7 @@
#include "orte/dps/dps.h" #include "orte/dps/dps.h"
#include "orte/mca/gpr/gpr.h" #include "orte/mca/gpr/gpr.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rml/rml.h" #include "orte/mca/rml/rml.h"
#include "orte/mca/soh/soh.h" #include "orte/mca/soh/soh.h"
@ -41,9 +42,7 @@ int orte_rmgr_base_proc_stage_gate_init(orte_jobid_t job)
{ {
size_t i, num_counters=6, num_named_trigs=5; size_t i, num_counters=6, num_named_trigs=5;
int rc; int rc;
orte_gpr_value_t *values, value, trigvalue, *trigvals; orte_gpr_value_t *values, value;
orte_gpr_trigger_t trig, *trigs;
orte_gpr_subscription_t sub, *subs;
char* keys[] = { char* keys[] = {
/* changes to this ordering need to be reflected in code below */ /* changes to this ordering need to be reflected in code below */
ORTE_PROC_NUM_AT_STG1, ORTE_PROC_NUM_AT_STG1,
@ -61,6 +60,9 @@ int orte_rmgr_base_proc_stage_gate_init(orte_jobid_t job)
ORTE_NUM_FINALIZED_TRIGGER, ORTE_NUM_FINALIZED_TRIGGER,
ORTE_NUM_TERMINATED_TRIGGER ORTE_NUM_TERMINATED_TRIGGER
}; };
char *segment, *trig_name, *tokens[2], *trig_keys[2];
orte_gpr_trigger_id_t id;
size_t trig_level;
/* setup the counters */ /* setup the counters */
OBJ_CONSTRUCT(&value, orte_gpr_value_t); OBJ_CONSTRUCT(&value, orte_gpr_value_t);
@ -106,486 +108,171 @@ int orte_rmgr_base_proc_stage_gate_init(orte_jobid_t job)
} }
OBJ_DESTRUCT(&value); OBJ_DESTRUCT(&value);
/* for the stage gate triggers, we want the counter values returned to us AND /*** DEFINE STAGE GATE STANDARD TRIGGERS ***/
* information on VPID_START so we can generate the list of peers /* The standard triggers will return the trigger counters so that we
* to receive the xcast messages for barrier release. * can get required information for notifying processes. Other
* subscriptions will then attach to them.
*/ */
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, job))) {
/*** SUBSCRIPTIONS ***/
/* the subscription object is used to define the values we want
* returned to us. we'll enter the precise data
* keys when we are ready to register the subscription - for now,
* do all the basic stuff
*/
OBJ_CONSTRUCT(&sub, orte_gpr_subscription_t);
/* we do not name the subscription - see explanation below. also, we do
* not assign the subscription id here - it is assigned for us when the
* registry "registers" the subscription and is returned in the
* subscription object at that time
*/
/*
* set the action to delete the subscription after the trigger fires. this
* subscription is solely for the purpose of returning stagegate information
* to the resource manager - we don't need it after that happens
*/
sub.action = ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG;
/*
* setup the value object to define the data to be returned to us
*/
OBJ_CONSTRUCT(&value, orte_gpr_value_t);
values = &value;
sub.values = &values;
sub.cnt = 1;
/* set the address mode to identify a specific container (in this case,
* the ORTE_JOB_GLOBALS container) and any keys within it
*/
value.addr_mode = ORTE_GPR_TOKENS_XAND | ORTE_GPR_KEYS_OR;
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&(value.segment), job))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return rc; return rc;
} }
/* define the tokens for the container */ tokens[0] = strdup(ORTE_JOB_GLOBALS);
value.tokens = (char**)malloc(sizeof(char*)); tokens[1] = NULL;
if (NULL == value.tokens) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value.tokens[0] = strdup(ORTE_JOB_GLOBALS); /* the counters are in the job's globals container */
value.num_tokens = 1;
/* define the keys to be returned */
value.cnt = 3;
value.keyvals = (orte_gpr_keyval_t**)malloc(value.cnt * sizeof(orte_gpr_keyval_t*));
if (NULL == value.keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
for (i=0; i < value.cnt; i++) {
value.keyvals[i] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == value.keyvals[i]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
}
/* the 0th entry will be defined below */
value.keyvals[1]->key = strdup(ORTE_JOB_SLOTS_KEY);
value.keyvals[2]->key = strdup(ORTE_JOB_VPID_START_KEY);
/* we don't need to define the type and value for the keyvals - the subscribe
* function ignores those fields
*/
sub.cbfunc = orte_rmgr_base_proc_stage_gate_mgr; trig_keys[0] = strdup(ORTE_JOB_SLOTS_KEY);
sub.user_tag = NULL;
/*** TRIGGERS ***/
/* setup the trigger information - initialize the common elements */
OBJ_CONSTRUCT(&trig, orte_gpr_trigger_t);
/* we WILL name the trig - see explanation below. we do
* NOT assign the trigger id here - it is assigned for us when the
* registry "registers" the trigger and is returned in the
* trigger object at that time
*/
/*
* set the action to compare all specified counter levels. this will
* "fire" the trigger when all counters are equal
*/
trig.action = ORTE_GPR_TRIG_ALL_CMP;
/*
* setup the value object to define the data to be returned to us
*/
OBJ_CONSTRUCT(&trigvalue, orte_gpr_value_t);
trigvals = &trigvalue;
trig.values = &trigvals;
trig.cnt = 1;
/* set the address mode to identify a specific container (in this case,
* the ORTE_JOB_GLOBALS container) and any keys within it
*/
trigvalue.addr_mode = ORTE_GPR_TOKENS_XAND | ORTE_GPR_KEYS_OR;
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&(trigvalue.segment), job))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
/* define the tokens for the container */
trigvalue.tokens = (char**)malloc(sizeof(char*));
if (NULL == trigvalue.tokens) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
trigvalue.tokens[0] = strdup(ORTE_JOB_GLOBALS); /* the counters are in the job's globals container */
trigvalue.num_tokens = 1;
/* define the keys that identify the counters */
trigvalue.cnt = 2;
trigvalue.keyvals = (orte_gpr_keyval_t**)malloc(trigvalue.cnt * sizeof(orte_gpr_keyval_t*));
if (NULL == trigvalue.keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
trigvalue.keyvals[0] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == trigvalue.keyvals[0]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
trigvalue.keyvals[1] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == trigvalue.keyvals[1]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
/* setup the triggers for the three main stage gates - these all compare
* their value to that in ORTE_JOB_SLOTS_KEY
*/
trigvalue.keyvals[0]->key = strdup(ORTE_JOB_SLOTS_KEY);
if (NULL == trigvalue.keyvals[0]->key) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
/* we don't need to define the type and value for the keyvals - the subscribe
* function ignores those fields
*/
/* do the three stage gate subscriptions, plus the named triggers
* that compare their values to the JOB_SLOTS_KEY
*/
for (i=0; i < num_named_trigs; i++) { for (i=0; i < num_named_trigs; i++) {
/* trig_keys[1] = strdup(keys[i]);
* NOTE: we do NOT name the subscriptions here as these are not if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&trig_name,
* standard subscriptions that multiple processes should attach
* themselves to - the subscriptions only have meaning to the
* resource manager
*/
value.keyvals[0]->key = strdup(keys[i]);
if (NULL == value.keyvals[0]->key) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
/*
* NOTE: we DO name the triggers as these will be standard triggers
* that multiple processes will want to attach themselves to - for
* example, a process may well want to receive some information when
* it reaches STAGE_GATE_1, and so will "attach" itself to that
* trigger as defined by us here
*/
if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&(trig.name),
trig_names[i], job))) { trig_names[i], job))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto CLEANUP; free(trig_keys[0]);
free(trig_keys[1]);
return rc;
} }
trigvalue.keyvals[1]->key = strdup(keys[i]); if (ORTE_SUCCESS != (rc = orte_gpr.define_trigger(&id, trig_name,
if (NULL == trigvalue.keyvals[1]->key) { ORTE_GPR_TRIG_INCLUDE_TRIG_CNTRS | ORTE_GPR_TRIG_ONE_SHOT |
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); ORTE_GPR_TRIG_ROUTE_DATA_THRU_ME | ORTE_GPR_TRIG_CMP_LEVELS,
rc = ORTE_ERR_OUT_OF_RESOURCE; ORTE_GPR_TOKENS_XAND | ORTE_GPR_KEYS_OR,
goto CLEANUP; segment, tokens, 2, trig_keys,
} orte_rmgr_base_proc_stage_gate_mgr, NULL))) {
subs = &sub;
trigs = &trig;
rc = orte_gpr.subscribe(
1, &subs,
1, &trigs);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto CLEANUP; free(trig_name);
free(trig_keys[0]);
free(trig_keys[1]);
return rc;
} }
free(value.keyvals[0]->key); free(trig_name);
value.keyvals[0]->key = NULL; free(trig_keys[1]);
free(trig.name);
free(trigvalue.keyvals[1]->key);
trigvalue.keyvals[1]->key = NULL;
} }
free(trig_keys[0]);
/* Next, setup the trigger that watches the NUM_ABORTED counter to see if /* Now define the abort trigger. Again, only the trigger counter needs
* any process abnormally terminates - if so, then call the * to be returned, so we don't need to setup a subscription to get
* stage_gate_mgr_abort function * other information
* so it can in turn order the job to be aborted
*/ */
sub.cbfunc = orte_rmgr_base_proc_stage_gate_mgr_abort; trig_keys[0] = strdup(ORTE_PROC_NUM_ABORTED);
value.keyvals[0]->key = strdup(ORTE_PROC_NUM_ABORTED); if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&trig_name,
if (NULL == value.keyvals[0]->key) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
/* set the trigger name */
if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&(trig.name),
ORTE_NUM_ABORTED_TRIGGER, job))) { ORTE_NUM_ABORTED_TRIGGER, job))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto CLEANUP; return rc;
} }
/* set the trigger action to fire at a specified level */ trig_level = 1;
trig.action = ORTE_GPR_TRIG_ALL_AT; if (ORTE_SUCCESS != (rc = orte_gpr.define_trigger_level(&id, trig_name,
/* cleanup the trigger keyvals that are no longer needed - we will ORTE_GPR_TRIG_INCLUDE_TRIG_CNTRS | ORTE_GPR_TRIG_ONE_SHOT |
* rebuild them as required ORTE_GPR_TRIG_AT_LEVEL,
*/ ORTE_GPR_TOKENS_XAND | ORTE_GPR_KEYS_OR,
OBJ_RELEASE(trigvalue.keyvals[0]); segment, tokens, 1, trig_keys, &trig_level,
OBJ_RELEASE(trigvalue.keyvals[1]); orte_rmgr_base_proc_stage_gate_mgr_abort, NULL))) {
free(trigvalue.keyvals);
/* we only need one trigger keyval here as we are not comparing
* trigger levels - we are just asking to be notified when
* a specific counter changes value to "1"
*/
trigvalue.cnt = 1;
trigvalue.keyvals = (orte_gpr_keyval_t**)malloc(sizeof(orte_gpr_keyval_t**));
if (NULL == trigvalue.keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
trigvalue.keyvals[0] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == trigvalue.keyvals[0]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
trigvalue.keyvals[0]->key = strdup(ORTE_PROC_NUM_ABORTED);
if (NULL == trigvalue.keyvals[0]->key) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
/* trigger on the first process that aborts */
trigvalue.keyvals[0]->type = ORTE_SIZE;
trigvalue.keyvals[0]->value.size = 1;
subs = &sub;
trigs = &trig;
rc = orte_gpr.subscribe(
1, &subs,
1, &trigs);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto CLEANUP; free(trig_name);
free(trig_keys[0]);
return rc;
} }
free(trig_name);
free(trig_keys[0]);
/* set the job state to "launched" */ /* set the job state to "launched" */
if (ORTE_SUCCESS != (rc = orte_soh.set_job_soh(job, ORTE_JOB_STATE_LAUNCHED))) { if (ORTE_SUCCESS != (rc = orte_soh.set_job_soh(job, ORTE_JOB_STATE_LAUNCHED))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} }
CLEANUP:
OBJ_DESTRUCT(&trigvalue);
trig.values = NULL;
OBJ_DESTRUCT(&trig);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return rc; return rc;
} }
void orte_rmgr_base_proc_stage_gate_mgr(orte_gpr_notify_data_t *data, int orte_rmgr_base_proc_stage_gate_mgr(orte_gpr_notify_message_t *msg)
void *user_tag)
{ {
orte_gpr_value_t **values; orte_buffer_t buffer;
orte_gpr_keyval_t **kvals;
orte_process_name_t *recipients=NULL; orte_process_name_t *recipients=NULL;
size_t i, j, m, n=0; size_t n=0;
orte_vpid_t k=0;
int rc; int rc;
bool found_slots=false, found_start=false;
bool found_stg1=false, found_stg2=false;
bool found_stg3=false, found_finalized=false;
orte_buffer_t msg;
orte_jobid_t job; orte_jobid_t job;
char **tokens=NULL;
size_t num_tokens;
values = (orte_gpr_value_t**)(data->values)->addr; /* check to see if this came from terminate. If so, we ignore it because
* that stage gate does NOT set an xcast barrier - processes simply
/* get the jobid from the segment name * record their state and continue processing
* we setup the stage gate triggers to return at least one value
* to us. we use that value to extract the jobid for the returned
* data
*/ */
if (ORTE_SUCCESS != (rc = if (orte_schema.check_std_trigger_name(msg->target, ORTE_NUM_TERMINATED_TRIGGER)) {
orte_schema.extract_jobid_from_segment_name(&job, return ORTE_SUCCESS;
values[0]->segment))) {
ORTE_ERROR_LOG(rc);
return;
} }
if (ORTE_SUCCESS != (rc = orte_schema.get_job_tokens(&tokens, &num_tokens, job))) { /* All stage gate triggers are named, so we can extract the jobid
ORTE_ERROR_LOG(rc); * directly from the trigger name
return;
}
/* check to see if this came from one of the stage gates as opposed
* to either terminate or finalize - if the latter, we set the job
* state as appropriate and then return - no message needs to be
* sent to the processes themselves
*/ */
kvals = values[0]->keyvals; if (ORTE_SUCCESS != (rc = orte_schema.extract_jobid_from_std_trigger_name(&job, msg->target))) {
for (i=0; i < values[0]->cnt; i++) {
if (0 == strcmp(kvals[i]->key, ORTE_PROC_NUM_TERMINATED)) {
if (ORTE_SUCCESS != (rc = orte_soh.set_job_soh(job, ORTE_JOB_STATE_TERMINATED))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
} return rc;
goto CLEANUP;
}
} }
/* value returned will contain the counter, which contains the number of /* need the list of peers for this job so we can send them the xcast.
* procs in this job. We need to know which counter is included as this * obtain this list from the name service's get_job_peers function
* tells us the job state we have reached.
*/ */
for (i=0, m=0; m < data->cnt && if (ORTE_SUCCESS != (rc = orte_ns.get_job_peers(&recipients, &n, job))) {
i < (data->values)->size && ORTE_ERROR_LOG(rc);
(!found_slots || !found_start || return rc;
(!found_stg1 && !found_stg2 && !found_stg3 && !found_finalized)); i++) {
if (NULL != values[i]) {
m++;
kvals = values[i]->keyvals;
/* check to see if ORTE_JOB_GLOBALS is the token */
if (NULL != values[i]->tokens &&
0 == strcmp(ORTE_JOB_GLOBALS, values[i]->tokens[0])) {
/* find the ORTE_JOB_SLOTS_KEY and the ORTE_JOB_VPID_START_KEY keyval */
for (j=0; j < values[i]->cnt &&
(!found_slots || !found_start ||
(!found_stg1 && !found_stg2 && !found_stg3 && !found_finalized)); j++) {
if (NULL != kvals[j] && !found_slots &&
0 == strcmp(ORTE_JOB_SLOTS_KEY, kvals[j]->key)) {
n = kvals[j]->value.size;
found_slots = true;
}
if (NULL != kvals[j] && !found_start &&
0 == strcmp(ORTE_JOB_VPID_START_KEY, kvals[j]->key)) {
k = kvals[j]->value.vpid;
found_start = true;
}
if (NULL != kvals[j] &&
0 == strcmp(ORTE_PROC_NUM_AT_STG1, kvals[j]->key)) {
found_stg1 = true;
} else if (NULL != kvals[j] &&
0 == strcmp(ORTE_PROC_NUM_AT_STG2, kvals[j]->key)) {
found_stg2 = true;
} else if (NULL != kvals[j] &&
0 == strcmp(ORTE_PROC_NUM_AT_STG3, kvals[j]->key)) {
found_stg3 = true;
} else if (NULL != kvals[j] &&
0 == strcmp(ORTE_PROC_NUM_FINALIZED, kvals[j]->key)) {
found_finalized = true;
}
}
}
}
}
if (!found_slots) {
ORTE_ERROR_LOG(ORTE_ERR_GPR_DATA_CORRUPT);
goto CLEANUP;
}
if (!found_start) {
ORTE_ERROR_LOG(ORTE_ERR_GPR_DATA_CORRUPT);
goto CLEANUP;
} }
/* set the job state to the appropriate level */ /* set the job state to the appropriate level */
if (found_stg1) { if (orte_schema.check_std_trigger_name(msg->target, ORTE_STG1_TRIGGER)) {
if (ORTE_SUCCESS != (rc = orte_soh.set_job_soh(job, ORTE_JOB_STATE_AT_STG1))) { if (ORTE_SUCCESS != (rc = orte_soh.set_job_soh(job, ORTE_JOB_STATE_AT_STG1))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto CLEANUP; goto CLEANUP;
} }
} else if (found_stg2) { } else if (orte_schema.check_std_trigger_name(msg->target, ORTE_STG2_TRIGGER)) {
if (ORTE_SUCCESS != (rc = orte_soh.set_job_soh(job, ORTE_JOB_STATE_AT_STG2))) { if (ORTE_SUCCESS != (rc = orte_soh.set_job_soh(job, ORTE_JOB_STATE_AT_STG2))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto CLEANUP; goto CLEANUP;
} }
} else if (found_stg3) { } else if (orte_schema.check_std_trigger_name(msg->target, ORTE_STG3_TRIGGER)) {
if (ORTE_SUCCESS != (rc = orte_soh.set_job_soh(job, ORTE_JOB_STATE_AT_STG3))) { if (ORTE_SUCCESS != (rc = orte_soh.set_job_soh(job, ORTE_JOB_STATE_AT_STG3))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto CLEANUP; goto CLEANUP;
} }
} else if (found_finalized) { } else if (orte_schema.check_std_trigger_name(msg->target, ORTE_NUM_FINALIZED_TRIGGER)) {
if (ORTE_SUCCESS != (rc = orte_soh.set_job_soh(job, ORTE_JOB_STATE_FINALIZED))) { if (ORTE_SUCCESS != (rc = orte_soh.set_job_soh(job, ORTE_JOB_STATE_FINALIZED))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto CLEANUP; goto CLEANUP;
} }
} }
/* now can generate the list of recipients */ /* set the message type to SUBSCRIPTION. When we give this to the processes, we want
recipients = (orte_process_name_t*)malloc(n * sizeof(orte_process_name_t)); * them to break the message down and deliver it to the various subsystems.
for (i=0; i < n; i++) {
recipients[i].cellid = 0;
recipients[i].jobid = job;
recipients[i].vpid = (orte_vpid_t)(k + i);
}
/* for the purposes of the stage gate manager, we don't actually have
* to determine anything from the message. All we have to do is respond
* by sending an xcast to all processes. However, the buffer has to include
* at least one piece of data for the RML to function, so pack something
* meaningless.
*/ */
msg->msg_type = ORTE_GPR_SUBSCRIPTION_MSG;
msg->id = ORTE_GPR_TRIGGER_ID_MAX;
OBJ_CONSTRUCT(&msg, orte_buffer_t); /* need to pack the msg for sending */
if (ORTE_SUCCESS != (rc = orte_dps.pack(&msg, &job, 1, ORTE_JOBID))) { OBJ_CONSTRUCT(&buffer, orte_buffer_t);
if (ORTE_SUCCESS != (rc = orte_dps.pack(&buffer, &msg, 1, ORTE_GPR_NOTIFY_MSG))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&msg); OBJ_DESTRUCT(&buffer);
goto CLEANUP; goto CLEANUP;
} }
/* send the message */
if (ORTE_SUCCESS != (rc = orte_rml.xcast(orte_process_info.my_name, recipients, if (ORTE_SUCCESS != (rc = orte_rml.xcast(orte_process_info.my_name, recipients,
n, &msg, NULL))) { n, &buffer, NULL, NULL))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&msg);
goto CLEANUP;
} }
OBJ_DESTRUCT(&msg); OBJ_DESTRUCT(&buffer);
CLEANUP: CLEANUP:
for (j=0; j < num_tokens; j++) {
free(tokens[j]);
tokens[j] = NULL;
}
if (NULL != tokens) free(tokens);
if (NULL != recipients) free(recipients); if (NULL != recipients) free(recipients);
return; return rc;
} }
void orte_rmgr_base_proc_stage_gate_mgr_abort(orte_gpr_notify_data_t *data, int orte_rmgr_base_proc_stage_gate_mgr_abort(orte_gpr_notify_message_t *msg)
void *user_tag)
{ {
orte_gpr_value_t **values;
orte_jobid_t job; orte_jobid_t job;
int rc; int rc;
/* get the jobid from the segment name /* All stage gate triggers are named, so we can extract the jobid
* we setup the stage gate triggers to return at least one value * directly from the trigger name
* to us. we use that value to extract the jobid for the returned
* data
*/ */
values = (orte_gpr_value_t**)(data->values)->addr; if (ORTE_SUCCESS != (rc = orte_schema.extract_jobid_from_std_trigger_name(&job, msg->target))) {
if (ORTE_SUCCESS != (rc =
orte_schema.extract_jobid_from_segment_name(&job,
values[0]->segment))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return; return rc;
} }
/* set the job status to "aborted" */ /* set the job status to "aborted" */
@ -595,27 +282,30 @@ void orte_rmgr_base_proc_stage_gate_mgr_abort(orte_gpr_notify_data_t *data,
} }
orte_errmgr.incomplete_start(job); orte_errmgr.incomplete_start(job);
return ORTE_SUCCESS;
} }
/* /*
* Routine that subscribes to events on all counters. * Routine that tools such as orterun can use to subscribe
* to events on all counters.
*/ */
int orte_rmgr_base_proc_stage_gate_subscribe(orte_jobid_t job, orte_gpr_notify_cb_fn_t cbfunc, void* cbdata) int orte_rmgr_base_proc_stage_gate_subscribe(orte_jobid_t job, orte_gpr_notify_cb_fn_t cbfunc, void* cbdata)
{ {
size_t i; size_t i;
int rc; int rc;
orte_gpr_value_t value, *values; char *segment, *trig_name, *tokens[2];
orte_gpr_trigger_t trig, *trigs; orte_gpr_subscription_id_t id;
orte_gpr_subscription_t sub, *subs;
char* keys[] = { char* keys[] = {
/* changes to this ordering need to be reflected in code below */ /* changes to this ordering need to be reflected in code below */
ORTE_PROC_NUM_AT_STG1, ORTE_PROC_NUM_AT_STG1,
ORTE_PROC_NUM_AT_STG2, ORTE_PROC_NUM_AT_STG2,
ORTE_PROC_NUM_AT_STG3, ORTE_PROC_NUM_AT_STG3,
ORTE_PROC_NUM_FINALIZED, ORTE_PROC_NUM_FINALIZED,
ORTE_PROC_NUM_TERMINATED ORTE_PROC_NUM_TERMINATED,
ORTE_PROC_NUM_ABORTED
}; };
char* trig_names[] = { char* trig_names[] = {
/* changes to this ordering need to be reflected in code below /* changes to this ordering need to be reflected in code below
@ -625,160 +315,45 @@ int orte_rmgr_base_proc_stage_gate_subscribe(orte_jobid_t job, orte_gpr_notify_c
ORTE_STG2_TRIGGER, ORTE_STG2_TRIGGER,
ORTE_STG3_TRIGGER, ORTE_STG3_TRIGGER,
ORTE_NUM_FINALIZED_TRIGGER, ORTE_NUM_FINALIZED_TRIGGER,
ORTE_NUM_TERMINATED_TRIGGER ORTE_NUM_TERMINATED_TRIGGER,
ORTE_NUM_ABORTED_TRIGGER
}; };
size_t num_counters = sizeof(keys)/sizeof(keys[0]); size_t num_counters = sizeof(keys)/sizeof(keys[0]);
/*** SUBSCRIPTIONS ***/ /* identify the segment for this job */
/* the subscription object is used to define the values we want if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, job))) {
* returned to us. we'll enter the precise data
* keys when we are ready to register the subscription - for now,
* do all the basic stuff
*/
OBJ_CONSTRUCT(&sub, orte_gpr_subscription_t);
/* we do not name the subscription - see explanation below. also, we do
* not assign the subscription id here - it is assigned for us when the
* registry "registers" the subscription and is returned in the
* subscription object at that time
*/
/*
* set the action to delete the subscription after the trigger fires. this
* subscription is solely for the purpose of returning stagegate information
* to the resource manager - we don't need it after that happens
*/
sub.action = ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG;
/*
* setup the value object to define the data to be returned to us
*/
OBJ_CONSTRUCT(&value, orte_gpr_value_t);
values = &value;
sub.values = &values;
sub.cnt = 1;
/* set the address mode to identify a specific container (in this case,
* the ORTE_JOB_GLOBALS container) and any keys within it
*/
value.addr_mode = ORTE_GPR_TOKENS_XAND | ORTE_GPR_KEYS_OR;
if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&(value.segment), job))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return rc; return rc;
} }
/* define the tokens for the container */
value.tokens = (char**)malloc(sizeof(char*));
if (NULL == value.tokens) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value.tokens[0] = strdup(ORTE_JOB_GLOBALS); /* the counters are in the job's globals container */
value.num_tokens = 1;
/* the keys describing the data to be returned will be defined later
* for now, we simply allocate the space
*/
value.keyvals = (orte_gpr_keyval_t**)malloc(sizeof(orte_gpr_keyval_t*));
if (NULL == value.keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value.keyvals[0] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == value.keyvals[0]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value.cnt = 1;
/* define the callback and associated data tag */
sub.cbfunc = cbfunc;
sub.user_tag = cbdata;
/*** TRIGGERS ***/ /* setup the tokens */
/* setup the trigger information - initialize the common elements */ tokens[0]=ORTE_JOB_GLOBALS;
OBJ_CONSTRUCT(&trig, orte_gpr_trigger_t); tokens[1]=NULL;
/* since the named triggers have already been defined, we don't need
* to replicate that here! all we need to do is refer to the
* proper trigger name - we'll do that below
*/
trig.action = ORTE_GPR_TRIG_ALL_CMP;
/* do the trigger subscriptions */
for (i=0; i < num_counters; i++) { for (i=0; i < num_counters; i++) {
/* insert the subscription key identifying the data to /* attach ourselves to the appropriate standard trigger */
* be returned from this trigger if (ORTE_SUCCESS !=
*/ (rc = orte_schema.get_std_trigger_name(&trig_name, trig_names[i], job))) {
value.keyvals[0]->key = strdup(keys[i]);
if (NULL == value.keyvals[0]->key) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
/* get the standard trigger name to which we are "attaching" */
if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&(trig.name),
trig_names[i], job))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto CLEANUP; free(segment);
}
subs = &sub;
trigs = &trig;
rc = orte_gpr.subscribe(
1, &subs,
1, &trigs);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
free(value.keyvals[0]->key);
value.keyvals[0]->key = NULL;
free(trig.name);
trig.name = NULL;
}
/* Now do the abort trigger.
* setup the subscription to return the number aborted\
*/
value.keyvals[0]->key = strdup(ORTE_PROC_NUM_ABORTED);
if (NULL == value.keyvals[0]->key) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
/* set the trigger action */
trig.action = ORTE_GPR_TRIG_ALL_AT;
/* get the standard "abort" trigger name */
if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&(trig.name),
ORTE_NUM_ABORTED_TRIGGER, job))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
subs = &sub;
trigs = &trig;
rc = orte_gpr.subscribe(
1, &subs,
1, &trigs);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
CLEANUP:
OBJ_DESTRUCT(&trig);
OBJ_DESTRUCT(&value);
sub.values = NULL;
OBJ_DESTRUCT(&sub);
return rc; return rc;
}
if (ORTE_SUCCESS != (rc = orte_gpr.subscribe_1(&id, trig_name, NULL,
ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG,
ORTE_GPR_TOKENS_OR | ORTE_GPR_KEYS_OR,
segment, tokens, keys[i],
cbfunc, cbdata))) {
ORTE_ERROR_LOG(rc);
free(segment);
free(trig_name);
return rc;
}
free(trig_name);
}
free(segment);
return ORTE_SUCCESS;
} }

Просмотреть файл

@ -169,7 +169,7 @@ typedef int (*orte_rmgr_base_module_proc_stage_gate_init_fn_t)(orte_jobid_t job)
* usually, broadcasting a message to all processes in the job that allows them * usually, broadcasting a message to all processes in the job that allows them
* to proceed. * to proceed.
*/ */
typedef void (*orte_rmgr_base_module_proc_stage_gate_mgr_fn_t)(orte_gpr_notify_data_t *data, void *user_tag); typedef int (*orte_rmgr_base_module_proc_stage_gate_mgr_fn_t)(orte_gpr_notify_message_t *msg);
/** /**
* Cleanup resources held by rmgr. * Cleanup resources held by rmgr.

Просмотреть файл

@ -338,7 +338,8 @@ typedef int (*orte_rml_module_xcast_fn_t)(
orte_process_name_t* peers, orte_process_name_t* peers,
size_t num_peers, size_t num_peers,
orte_buffer_t* buffer, orte_buffer_t* buffer,
orte_rml_buffer_callback_fn_t cbfunc); orte_gpr_trigger_cb_fn_t cbfunc,
void *user_tag);
/* /*
* Initialization/Cleanup * Initialization/Cleanup