1
1

First phase of the scalable RTE changes:

1. Modify the registry to eliminate redundant data copying for startup messages.

2. Revise the subscription/trigger system to avoid redundant storage of triggers and subscriptions. This dramatically reduces the search time when a registry action occurs - to illustrate the point, there are now only a handful of triggers on the system for each job. Before, there were a handful of triggers for each PROCESS in the job, all of which had to be checked every time something happened on the registry. This is much, much faster now.

3. Update all subscriptions to the new format. There are now "named" subscriptions - this allows you to "name" a subscription that all the processes will be using. The first one to hit the registry actually defines the subscription. From then on, any subsequent "subscribes" to the same name just cause that process to "attach" to the existing subscription. This keeps the number of subscriptions being tracked by the registry to a minimum, while ensuring that each process still gets notified.

4. Do the same for triggers.

Also fixed a duplicate subscription problem that was causing people to receive data equal to the number of processes times the data they should have received from a trigger/subscription. Sorry about that... :-( ...but it's all better now!

Uncovered a situation where the modex data seems to be getting entered on the registry a second time - the latter time coming after the compound command has been "fired", thereby causing all the subscriptions to fire. Asked Tim and Jeff to look into this.

Second phase of the changes will involve modifying the xcast system so that the same message gets sent to all processes. This will further reduce the message traffic, and - once we have a true "broadcast" version of xcast - really speed things up and improve scalability.

This commit was SVN r6542.
Этот коммит содержится в:
Ralph Castain 2005-07-18 18:49:00 +00:00
родитель 75b0fa3c87
Коммит 19d58ee17e
34 изменённых файлов: 1585 добавлений и 1126 удалений

Просмотреть файл

@ -277,7 +277,7 @@ void ompi_attr_create_predefined_callback(
orte_gpr_notify_data_t *data,
void *cbdata)
{
size_t i, j;
size_t i, j, k;
orte_gpr_keyval_t **keyval;
orte_gpr_value_t **value;
orte_jobid_t job;
@ -320,17 +320,21 @@ void ompi_attr_create_predefined_callback(
if (0 == data->cnt) { /* no data returned */
universe_size = ompi_comm_size(MPI_COMM_WORLD);
} else {
value = data->values;
for (i=0; i < data->cnt; i++) {
if (0 < value[i]->cnt) { /* make sure some data was returned here */
keyval = value[i]->keyvals;
for (j=0; j < value[i]->cnt; j++) {
/* make sure we don't get confused - all slot counts
* are in size_t fields
*/
if (ORTE_SIZE == keyval[j]->type) {
/* Process slot count */
universe_size += keyval[j]->value.size;
value = (orte_gpr_value_t**)(data->values)->addr;
for (i=0, k=0; k < data->cnt &&
i < (data->values)->size; i++) {
if (NULL != value[i]) {
k++;
if (0 < value[i]->cnt) { /* make sure some data was returned here */
keyval = value[i]->keyvals;
for (j=0; j < value[i]->cnt; j++) {
/* make sure we don't get confused - all slot counts
* are in size_t fields
*/
if (ORTE_SIZE == keyval[j]->type) {
/* Process slot count */
universe_size += keyval[j]->value.size;
}
}
}
}

Просмотреть файл

@ -78,6 +78,7 @@ enum {
* OMPI-specific names for triggers and subscriptions used across processes
*/
#define OMPI_ATTRIBUTE_SUBSCRIPTION "ompi-attribute-sub"
#define OMPI_PROC_SUBSCRIPTION "ompi-proc-sub"
#define OMPI_OOB_SUBSCRIPTION "ompi-oob-sub"
#define OMPI_MODEX_SUBSCRIPTION "ompi-modex-sub"

Просмотреть файл

@ -201,7 +201,8 @@ static void mca_base_modex_registry_callback(
orte_gpr_notify_data_t* data,
void* cbdata)
{
size_t i, j;
size_t i, j, k;
orte_gpr_value_t **values, *value;
orte_gpr_keyval_t **keyval;
ompi_proc_t *proc;
ompi_proc_t **new_procs = NULL;
@ -229,119 +230,123 @@ orte_gpr_base_dump_notify_data(data,0);
}
/* process the callback */
for (i=0; i < data->cnt; i++) {
orte_gpr_value_t *value = data->values[i];
if (0 < value->cnt) { /* needs to be at least one keyval */
/*
* Token for the value should be the process name - look it up
*/
token = value->tokens;
if (ORTE_SUCCESS == orte_ns.convert_string_to_process_name(&proc_name, token[0])) {
proc = ompi_proc_find_and_add(proc_name, &isnew);
if(NULL == proc)
continue;
if(isnew) {
new_procs[new_proc_count] = proc;
new_proc_count++;
}
values = (orte_gpr_value_t**)(data->values)->addr;
for (i=0, k=0; k < data->cnt &&
i < (data->values)->size; i++) {
if (NULL != values[i]) {
k++;
value = values[i];
if (0 < value->cnt) { /* needs to be at least one keyval */
/*
* Lookup the modex data structure.
* Token for the value should be the process name - look it up
*/
token = value->tokens;
if (ORTE_SUCCESS == orte_ns.convert_string_to_process_name(&proc_name, token[0])) {
proc = ompi_proc_find_and_add(proc_name, &isnew);
if(NULL == proc)
continue;
OPAL_THREAD_LOCK(&proc->proc_lock);
if(NULL == (modex = (mca_base_modex_t*)proc->proc_modex)) {
modex = OBJ_NEW(mca_base_modex_t);
if(NULL == modex) {
opal_output(0, "mca_base_modex_registry_callback: unable to allocate mca_base_modex_t\n");
OPAL_THREAD_UNLOCK(&proc->proc_lock);
return;
}
proc->proc_modex = &modex->super;
}
/*
* Extract the component name and version from the keyval object's key
* Could be multiple keyvals returned since there is one for each
* component type/name/version - process them all
*/
keyval = value->keyvals;
for (j=0; j < value->cnt; j++) {
orte_buffer_t buffer;
char *ptr;
void* bytes = NULL;
size_t cnt;
size_t num_bytes;
if(strcmp(keyval[j]->key,"modex") != 0)
continue;
OBJ_CONSTRUCT(&buffer, orte_buffer_t);
if (ORTE_SUCCESS != (rc = orte_dps.load(&buffer,
keyval[j]->value.byteobject.bytes,
keyval[j]->value.byteobject.size))) {
ORTE_ERROR_LOG(rc);
continue;
}
cnt = 1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(&buffer, &ptr, &cnt, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
continue;
}
strcpy(component.mca_type_name,ptr);
free(ptr);
cnt = 1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(&buffer, &ptr, &cnt, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
continue;
}
strcpy(component.mca_component_name,ptr);
free(ptr);
cnt = 1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(&buffer,
&component.mca_component_major_version, &cnt, ORTE_INT32))) {
ORTE_ERROR_LOG(rc);
continue;
}
cnt = 1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(&buffer,
&component.mca_component_minor_version, &cnt, ORTE_INT32))) {
ORTE_ERROR_LOG(rc);
continue;
}
cnt = 1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(&buffer,
&num_bytes, &cnt, ORTE_SIZE))) {
ORTE_ERROR_LOG(rc);
continue;
}
if (num_bytes != 0) {
if(NULL == (bytes = malloc(num_bytes))) {
opal_output(0, "Unable to allocate memory (length %d bytes).\n", num_bytes );
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
continue;
}
}
if (ORTE_SUCCESS != (rc = orte_dps.unpack(&buffer, bytes, &num_bytes, ORTE_BYTE))) {
ORTE_ERROR_LOG(rc);
continue;
if(isnew) {
new_procs[new_proc_count] = proc;
new_proc_count++;
}
/*
* Lookup the corresponding modex structure
* Lookup the modex data structure.
*/
if(NULL == (modex_module = mca_base_modex_create_module(modex, &component))) {
opal_output(0, "mca_base_modex_registry_callback: mca_base_modex_create_module failed\n");
OBJ_RELEASE(data);
OPAL_THREAD_UNLOCK(&proc->proc_lock);
return;
OPAL_THREAD_LOCK(&proc->proc_lock);
if(NULL == (modex = (mca_base_modex_t*)proc->proc_modex)) {
modex = OBJ_NEW(mca_base_modex_t);
if(NULL == modex) {
opal_output(0, "mca_base_modex_registry_callback: unable to allocate mca_base_modex_t\n");
OPAL_THREAD_UNLOCK(&proc->proc_lock);
return;
}
proc->proc_modex = &modex->super;
}
modex_module->module_data = bytes;
modex_module->module_data_size = num_bytes;
modex_module->module_data_avail = true;
/*
* Extract the component name and version from the keyval object's key
* Could be multiple keyvals returned since there is one for each
* component type/name/version - process them all
*/
keyval = value->keyvals;
for (j=0; j < value->cnt; j++) {
orte_buffer_t buffer;
char *ptr;
void* bytes = NULL;
size_t cnt;
size_t num_bytes;
if(strcmp(keyval[j]->key,"modex") != 0)
continue;
OBJ_CONSTRUCT(&buffer, orte_buffer_t);
if (ORTE_SUCCESS != (rc = orte_dps.load(&buffer,
keyval[j]->value.byteobject.bytes,
keyval[j]->value.byteobject.size))) {
ORTE_ERROR_LOG(rc);
continue;
}
cnt = 1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(&buffer, &ptr, &cnt, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
continue;
}
strcpy(component.mca_type_name,ptr);
free(ptr);
cnt = 1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(&buffer, &ptr, &cnt, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
continue;
}
strcpy(component.mca_component_name,ptr);
free(ptr);
cnt = 1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(&buffer,
&component.mca_component_major_version, &cnt, ORTE_INT32))) {
ORTE_ERROR_LOG(rc);
continue;
}
cnt = 1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(&buffer,
&component.mca_component_minor_version, &cnt, ORTE_INT32))) {
ORTE_ERROR_LOG(rc);
continue;
}
cnt = 1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(&buffer,
&num_bytes, &cnt, ORTE_SIZE))) {
ORTE_ERROR_LOG(rc);
continue;
}
if (num_bytes != 0) {
if(NULL == (bytes = malloc(num_bytes))) {
opal_output(0, "Unable to allocate memory (length %d bytes).\n", num_bytes );
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
continue;
}
}
if (ORTE_SUCCESS != (rc = orte_dps.unpack(&buffer, bytes, &num_bytes, ORTE_BYTE))) {
ORTE_ERROR_LOG(rc);
continue;
}
/*
* Lookup the corresponding modex structure
*/
if(NULL == (modex_module = mca_base_modex_create_module(modex, &component))) {
opal_output(0, "mca_base_modex_registry_callback: mca_base_modex_create_module failed\n");
OBJ_RELEASE(data);
OPAL_THREAD_UNLOCK(&proc->proc_lock);
return;
}
modex_module->module_data = bytes;
modex_module->module_data_size = num_bytes;
modex_module->module_data_avail = true;
#if 0
opal_output(0, "[%lu,%lu,%lu] mca_base_modex_registry_callback: %s-%s-%d-%d received %d bytes\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
@ -351,12 +356,13 @@ opal_output(0, "[%lu,%lu,%lu] mca_base_modex_registry_callback: %s-%s-%d-%d rece
component.mca_component_minor_version,
num_bytes);
#endif
opal_condition_signal(&modex_module->module_data_cond);
}
OPAL_THREAD_UNLOCK(&proc->proc_lock);
} /* convert string to process name */
} /* if value[i]->cnt > 0 */
opal_condition_signal(&modex_module->module_data_cond);
}
OPAL_THREAD_UNLOCK(&proc->proc_lock);
} /* convert string to process name */
} /* if value[i]->cnt > 0 */
}
}
/* pml add procs */

Просмотреть файл

@ -346,7 +346,7 @@ static int setup_registry_callback(void)
process-independent message and broadcast to all processes */
if (ORTE_SUCCESS !=
(rc = orte_schema.get_std_subscription_name(&(sub.name),
OMPI_ATTRIBUTE_SUBSCRIPTION, jobid))) {
OMPI_PROC_SUBSCRIPTION, jobid))) {
return rc;
}
@ -429,7 +429,7 @@ static int setup_registry_callback(void)
*/
static void callback(orte_gpr_notify_data_t *data, void *cbdata)
{
size_t i, j;
size_t i, j, k;
char *str;
bool found_name;
orte_ns_cmp_bitmask_t mask;
@ -448,36 +448,40 @@ static void callback(orte_gpr_notify_data_t *data, void *cbdata)
/* loop over the data returned in the subscription */
mask = ORTE_NS_CMP_CELLID | ORTE_NS_CMP_JOBID | ORTE_NS_CMP_VPID;
value = data->values;
for (i = 0; i < data->cnt; ++i) {
str = NULL;
found_name = false;
keyval = value[i]->keyvals;
/* find the 2 keys that we're looking for */
for (j = 0; j < value[i]->cnt; ++j) {
if (strcmp(keyval[j]->key, ORTE_PROC_NAME_KEY) == 0) {
orte_ns.get_proc_name_string(&str, &keyval[j]->value.proc);
name = keyval[j]->value.proc;
found_name = true;
} else if (strcmp(keyval[j]->key, ORTE_NODE_NAME_KEY) == 0) {
if (NULL != str) {
free(str);
value = (orte_gpr_value_t**)(data->values)->addr;
for (i = 0, k=0; k < data->cnt &&
i < (data->values)->size; ++i) {
if (NULL != value[i]) {
k++;
str = NULL;
found_name = false;
keyval = value[i]->keyvals;
/* find the 2 keys that we're looking for */
for (j = 0; j < value[i]->cnt; ++j) {
if (strcmp(keyval[j]->key, ORTE_PROC_NAME_KEY) == 0) {
orte_ns.get_proc_name_string(&str, &keyval[j]->value.proc);
name = keyval[j]->value.proc;
found_name = true;
} else if (strcmp(keyval[j]->key, ORTE_NODE_NAME_KEY) == 0) {
if (NULL != str) {
free(str);
}
str = strdup(keyval[j]->value.strptr);
}
str = strdup(keyval[j]->value.strptr);
}
}
/* if we found both keys and the proc is on my local host,
find it in the master proc list and set the "local" flag */
if (NULL != str && found_name &&
0 == strcmp(str, orte_system_info.nodename)) {
for (proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list);
proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list);
proc = (ompi_proc_t*)opal_list_get_next(proc)) {
if (0 == orte_ns.compare(mask, &name,
&proc->proc_name)) {
proc->proc_flags |= OMPI_PROC_FLAG_LOCAL;
/* if we found both keys and the proc is on my local host,
find it in the master proc list and set the "local" flag */
if (NULL != str && found_name &&
0 == strcmp(str, orte_system_info.nodename)) {
for (proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list);
proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list);
proc = (ompi_proc_t*)opal_list_get_next(proc)) {
if (0 == orte_ns.compare(mask, &name,
&proc->proc_name)) {
proc->proc_flags |= OMPI_PROC_FLAG_LOCAL;
}
}
}
}

Просмотреть файл

@ -353,6 +353,20 @@ bool orte_pointer_array_test_and_set_item (orte_pointer_array_t *table,
}
int orte_pointer_array_set_size(orte_pointer_array_t *array, size_t new_size)
{
OPAL_THREAD_LOCK(&(table->lock));
while (new_size > orte_pointer_array_get_size(array)) {
if (!grow_table(array)) {
OPAL_THREAD_UNLOCK(&(table->lock));
return ORTE_ERROR;
}
}
OPAL_THREAD_UNLOCK(&(table->lock));
return ORTE_SUCCESS;
}
static bool grow_table(orte_pointer_array_t *table)
{
size_t new_size, i;

Просмотреть файл

@ -141,6 +141,19 @@ static inline size_t orte_pointer_array_get_size(orte_pointer_array_t *array)
}
/**
* Set the size of the pointer array
*
* @param array Pointer to array (IN)
*
* @param size Desired size of the array
*
* Simple function to set the size of the array in order to
* hide the member field from external users.
*/
OMPI_DECLSPEC int orte_pointer_array_set_size(orte_pointer_array_t *array, size_t size);
/**
* Clear the pointer array
*
@ -153,11 +166,9 @@ static inline size_t orte_pointer_array_get_size(orte_pointer_array_t *array)
*/
static inline void orte_pointer_array_clear(orte_pointer_array_t *array)
{
size_t i;
OPAL_THREAD_LOCK(&(array->lock));
for (i=0; i < array->size; i++) {
array->addr[i] = NULL;
}
/* set the array elements to NULL */
memset(array->addr, 0, array->size * sizeof(void*));
array->lowest_free = 0;
array->number_free = array->size;
OPAL_THREAD_UNLOCK(&(array->lock));

Просмотреть файл

@ -17,7 +17,6 @@
#ifndef ORTE_TYPES_H
#define ORTE_TYPES_H
#include "opal/class/opal_object.h"
/**
* Supported datatypes for messaging and storage operations.
@ -65,26 +64,28 @@ typedef uint8_t orte_data_type_t ;
#define ORTE_NAME (orte_data_type_t) 26 /**< an ompi_process_name_t */
#define ORTE_VPID (orte_data_type_t) 27 /**< a vpid */
#define ORTE_JOBID (orte_data_type_t) 28 /**< a jobid */
#define ORTE_CELLID (orte_data_type_t) 29 /**< a cellid */
#define ORTE_JOBGRP (orte_data_type_t) 29 /**< a job group */
#define ORTE_CELLID (orte_data_type_t) 30 /**< a cellid */
/* SOH types */
#define ORTE_NODE_STATE (orte_data_type_t) 30 /**< node status flag */
#define ORTE_PROC_STATE (orte_data_type_t) 31 /**< process/resource status */
#define ORTE_EXIT_CODE (orte_data_type_t) 32 /**< process exit code */
#define ORTE_NODE_STATE (orte_data_type_t) 31 /**< node status flag */
#define ORTE_PROC_STATE (orte_data_type_t) 32 /**< process/resource status */
#define ORTE_EXIT_CODE (orte_data_type_t) 33 /**< process exit code */
/* GPR types */
#define ORTE_KEYVAL (orte_data_type_t) 33 /**< registry key-value pair */
#define ORTE_GPR_NOTIFY_ACTION (orte_data_type_t) 34 /**< registry notify action */
#define ORTE_GPR_TRIGGER_ACTION (orte_data_type_t) 35 /**< registry trigger action */
#define ORTE_GPR_CMD (orte_data_type_t) 36 /**< registry command */
#define ORTE_GPR_SUBSCRIPTION_ID (orte_data_type_t) 37 /**< registry notify id tag */
#define ORTE_GPR_TRIGGER_ID (orte_data_type_t) 38 /**< registry notify id tag */
#define ORTE_GPR_VALUE (orte_data_type_t) 39 /**< registry return value */
#define ORTE_GPR_ADDR_MODE (orte_data_type_t) 40 /**< Addressing mode for registry cmds */
#define ORTE_GPR_SUBSCRIPTION (orte_data_type_t) 41 /**< describes data returned by subscription */
#define ORTE_GPR_TRIGGER (orte_data_type_t) 42 /**< describes trigger conditions */
#define ORTE_GPR_NOTIFY_DATA (orte_data_type_t) 43 /**< data returned from a subscription */
#define ORTE_KEYVAL (orte_data_type_t) 34 /**< registry key-value pair */
#define ORTE_GPR_NOTIFY_ACTION (orte_data_type_t) 35 /**< registry notify action */
#define ORTE_GPR_TRIGGER_ACTION (orte_data_type_t) 36 /**< registry trigger action */
#define ORTE_GPR_CMD (orte_data_type_t) 37 /**< registry command */
#define ORTE_GPR_SUBSCRIPTION_ID (orte_data_type_t) 38 /**< registry notify id tag */
#define ORTE_GPR_TRIGGER_ID (orte_data_type_t) 39 /**< registry notify id tag */
#define ORTE_GPR_VALUE (orte_data_type_t) 40 /**< registry return value */
#define ORTE_GPR_ADDR_MODE (orte_data_type_t) 41 /**< Addressing mode for registry cmds */
#define ORTE_GPR_SUBSCRIPTION (orte_data_type_t) 42 /**< describes data returned by subscription */
#define ORTE_GPR_TRIGGER (orte_data_type_t) 43 /**< describes trigger conditions */
#define ORTE_GPR_NOTIFY_DATA (orte_data_type_t) 44 /**< data returned from a subscription */
#define ORTE_GPR_NOTIFY_MSG (orte_data_type_t) 45 /**< notify message containing notify_data objects */
/* Resource Manager types */
#define ORTE_APP_CONTEXT (orte_data_type_t) 44 /**< argv and enviro arrays */
#define ORTE_APP_CONTEXT_MAP (orte_data_type_t) 45 /**< application context mapping array */
#define ORTE_APP_CONTEXT (orte_data_type_t) 46 /**< argv and enviro arrays */
#define ORTE_APP_CONTEXT_MAP (orte_data_type_t) 47 /**< application context mapping array */
/* define the starting point for dynamically assigning data types */
#define ORTE_DPS_ID_DYNAMIC 50

Просмотреть файл

@ -33,7 +33,7 @@ libmca_gpr_base_la_SOURCES = \
gpr_base_close.c \
gpr_base_select.c \
gpr_base_xfer_payload.c \
data_type_support/gpr_data_type_packing_fns.c \
data_type_support/gpr_data_type_packing_fns.c \
data_type_support/gpr_data_type_unpacking_fns.c \
unpack_api_response/gpr_base_unpack_cleanup.c \
unpack_api_response/gpr_base_unpack_del_index.c \

Просмотреть файл

@ -216,6 +216,9 @@ int orte_gpr_base_pack_trigger(orte_buffer_t *buffer, void *src,
int orte_gpr_base_pack_notify_data(orte_buffer_t *buffer, void *src,
size_t num_vals, orte_data_type_t type);
int orte_gpr_base_pack_notify_msg(orte_buffer_t *buffer, void *src,
size_t num_vals, orte_data_type_t type);
/* GPR DATA TYPE UNPACKING FUNCTIONS */
int orte_gpr_base_unpack_cmd(orte_buffer_t *buffer, void *dest,
size_t *num_vals, orte_data_type_t type);
@ -250,6 +253,9 @@ int orte_gpr_base_unpack_trigger(orte_buffer_t *buffer, void *dest,
int orte_gpr_base_unpack_notify_data(orte_buffer_t *buffer, void *dest,
size_t *num_vals, orte_data_type_t type);
int orte_gpr_base_unpack_notify_msg(orte_buffer_t *buffer, void *dest,
size_t *num_vals, orte_data_type_t type);
/* general utilities */
OMPI_DECLSPEC int orte_gpr_base_xfer_payload(orte_gpr_value_union_t *dest,
orte_gpr_value_union_t *src,
@ -259,6 +265,7 @@ OMPI_DECLSPEC int orte_gpr_base_xfer_payload(orte_gpr_value_union_t *dest,
* globals that might be needed inside the gpr
*/
OMPI_DECLSPEC extern int orte_gpr_base_output;
OMPI_DECLSPEC extern size_t orte_gpr_array_max_size, orte_gpr_array_block_size;
OMPI_DECLSPEC extern bool orte_gpr_base_selected;
OMPI_DECLSPEC extern opal_list_t orte_gpr_base_components_available;
OMPI_DECLSPEC extern mca_gpr_base_component_t orte_gpr_base_selected_component;

Просмотреть файл

@ -337,13 +337,21 @@ int orte_gpr_base_pack_notify_data(orte_buffer_t *buffer, void *src,
{
int rc;
orte_gpr_notify_data_t **data;
size_t i;
orte_gpr_value_t **values;
size_t i, j, k;
/* array of pointers to notify data objects - need to pack the objects */
data = (orte_gpr_notify_data_t**) src;
for (i=0; i<num_vals; i++) {
/* pack the subscription name */
if (ORTE_SUCCESS != (rc = orte_dps_pack_buffer(buffer,
(void*)(&(data[i]->name)), 1, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the subscription number */
if (ORTE_SUCCESS != (rc = orte_dps_pack_buffer(buffer,
(void*)(&(data[i]->id)), 1, ORTE_GPR_SUBSCRIPTION_ID))) {
@ -351,6 +359,13 @@ int orte_gpr_base_pack_notify_data(orte_buffer_t *buffer, void *src,
return rc;
}
/* pack the remove flag */
if (ORTE_SUCCESS != (rc = orte_dps_pack_buffer(buffer,
(void*)(&(data[i]->remove)), 1, ORTE_BOOL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the number of values so we can read it for unpacking */
if (ORTE_SUCCESS != (rc = orte_dps_pack_buffer(buffer,
(void*)(&(data[i]->cnt)), 1, DPS_TYPE_SIZE_T))) {
@ -360,10 +375,88 @@ int orte_gpr_base_pack_notify_data(orte_buffer_t *buffer, void *src,
/* if there are values, pack the values */
if (0 < data[i]->cnt) {
if (ORTE_SUCCESS != (rc = orte_dps_pack_buffer(buffer,
(void*)((data[i]->values)), data[i]->cnt, ORTE_GPR_VALUE))) {
ORTE_ERROR_LOG(rc);
return rc;
values = (orte_gpr_value_t**)(data[i]->values)->addr;
for (j=0, k=0; k < data[i]->cnt &&
j < (data[i]->values)->size; j++) {
if (NULL != values[j]) {
k++;
if (ORTE_SUCCESS != (rc = orte_dps_pack_buffer(buffer, &values[j],
1, ORTE_GPR_VALUE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}
}
}
return ORTE_SUCCESS;
}
/*
* NOTIFY MSG
*/
int orte_gpr_base_pack_notify_msg(orte_buffer_t *buffer, void *src,
size_t num_vals, orte_data_type_t type)
{
int rc;
orte_gpr_notify_message_t **msg;
orte_gpr_notify_data_t **data;
size_t i, j, k;
/* array of messages */
msg = (orte_gpr_notify_message_t**) src;
for (i=0; i<num_vals; i++) {
/* pack the trigger name */
if (ORTE_SUCCESS != (rc = orte_dps_pack_buffer(buffer,
(void*)(&(msg[i]->name)), 1, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the trigger number */
if (ORTE_SUCCESS != (rc = orte_dps_pack_buffer(buffer,
(void*)(&(msg[i]->id)), 1, ORTE_GPR_TRIGGER_ID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the remove flag */
if (ORTE_SUCCESS != (rc = orte_dps_pack_buffer(buffer,
(void*)(&(msg[i]->remove)), 1, ORTE_BOOL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the number of datagrams so we can read it for unpacking */
if (ORTE_SUCCESS != (rc = orte_dps_pack_buffer(buffer,
(void*)(&(msg[i]->cnt)), 1, DPS_TYPE_SIZE_T))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* if there are datagrams, pack them */
if (0 < msg[i]->cnt) {
/* array of pointers to notify data objects - need to pack the objects.
* to do this, we assume that the array objects are continguous
* in the pointer array. the pointer array itself does not
* guarantee this property - we are exploiting, however, our knowledge
* of how these messages are constructed.
*/
data = (orte_gpr_notify_data_t**)(msg[i]->data)->addr;
for (j=0, k=0; k < msg[i]->cnt &&
j < (msg[i]->data)->size; j++) {
if (NULL != data[j]) {
k++;
if (ORTE_SUCCESS != (rc = orte_dps_pack_buffer(buffer, &(data[j]),
1, ORTE_GPR_NOTIFY_DATA))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}
}
}

Просмотреть файл

@ -393,7 +393,8 @@ int orte_gpr_base_unpack_notify_data(orte_buffer_t *buffer, void *dest,
{
int rc;
orte_gpr_notify_data_t **data;
size_t i, max_n=1;
orte_gpr_value_t **values;
size_t i, j, max_n=1;
/* unpack into array of notify_data objects */
data = (orte_gpr_notify_data_t**) dest;
@ -406,6 +407,13 @@ int orte_gpr_base_unpack_notify_data(orte_buffer_t *buffer, void *dest,
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* unpack the subscription name */
if (ORTE_SUCCESS != (rc = orte_dps_unpack_buffer(buffer, &(data[i]->name),
&max_n, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the subscription number */
if (ORTE_SUCCESS != (rc = orte_dps_unpack_buffer(buffer, &(data[i]->id),
&max_n, ORTE_GPR_SUBSCRIPTION_ID))) {
@ -413,27 +421,107 @@ int orte_gpr_base_unpack_notify_data(orte_buffer_t *buffer, void *dest,
return rc;
}
/* unpack the remove flag */
if (ORTE_SUCCESS != (rc = orte_dps_unpack_buffer(buffer, &(data[i]->remove),
&max_n, ORTE_BOOL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* get the number of values */
if (ORTE_SUCCESS != (rc = orte_dps_unpack_buffer(buffer, &(data[i]->cnt),
&max_n, DPS_TYPE_SIZE_T))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* if there are values, allocate the required space for the value pointers */
/* if there are values, unpack them to the value array */
if (0 < data[i]->cnt) {
data[i]->values = (orte_gpr_value_t**)malloc(data[i]->cnt * sizeof(orte_gpr_value_t*));
if (NULL == data[i]->values) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* and unpack them */
if (ORTE_SUCCESS != (rc = orte_dps_unpack_buffer(buffer, data[i]->values,
&(data[i]->cnt), ORTE_GPR_VALUE))) {
if (ORTE_SUCCESS != (rc = orte_pointer_array_set_size(data[i]->values, data[i]->cnt))) {
ORTE_ERROR_LOG(rc);
return rc;
}
values = (orte_gpr_value_t**)(data[i]->values)->addr;
for (j=0; j < data[i]->cnt; j++) {
max_n = 1;
if (ORTE_SUCCESS != (rc = orte_dps_unpack_buffer(buffer, &(values[j]),
&max_n, ORTE_GPR_VALUE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}
}
return ORTE_SUCCESS;
}
/*
* NOTIFY MSG
*/
int orte_gpr_base_unpack_notify_msg(orte_buffer_t *buffer, void *dest,
size_t *num_vals, orte_data_type_t type)
{
int rc;
orte_gpr_notify_message_t **msg;
orte_gpr_notify_data_t **data;
size_t i, j, max_n=1;
/* unpack into array of notify_data objects */
msg = (orte_gpr_notify_message_t**) dest;
for (i=0; i < *num_vals; i++) {
/* create the data object */
msg[i] = OBJ_NEW(orte_gpr_notify_message_t);
if (NULL == msg[i]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* unpack the trigger name */
if (ORTE_SUCCESS != (rc = orte_dps_unpack_buffer(buffer, &(msg[i]->name),
&max_n, ORTE_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the trigger number */
if (ORTE_SUCCESS != (rc = orte_dps_unpack_buffer(buffer, &(msg[i]->id),
&max_n, ORTE_GPR_TRIGGER_ID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the remove flag */
if (ORTE_SUCCESS != (rc = orte_dps_unpack_buffer(buffer, &(msg[i]->remove),
&max_n, ORTE_BOOL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* get the number of datagrams */
if (ORTE_SUCCESS != (rc = orte_dps_unpack_buffer(buffer, &(msg[i]->cnt),
&max_n, DPS_TYPE_SIZE_T))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* if there are datagrams, unpack them to the data array */
if (0 < msg[i]->cnt) {
if (ORTE_SUCCESS != (rc = orte_pointer_array_set_size(msg[i]->data, msg[i]->cnt))) {
ORTE_ERROR_LOG(rc);
return rc;
}
data = (orte_gpr_notify_data_t**)(msg[i]->data)->addr;
for (j=0; j < msg[i]->cnt; j++) {
max_n = 1;
if (ORTE_SUCCESS != (rc = orte_dps_unpack_buffer(buffer, &(data[j]),
&max_n, ORTE_GPR_NOTIFY_DATA))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}
}

Просмотреть файл

@ -35,6 +35,14 @@
#include "orte/mca/gpr/base/static-components.h"
/* JMS: This is only INT_MAX until bug 1345 is fixed, because this
value is used to set an MAC parameter, which can [currently] only
take an int. */
#define ORTE_GPR_ARRAY_MAX_SIZE INT_MAX
#define ORTE_GPR_ARRAY_BLOCK_SIZE 512
/*
* globals
*/
@ -129,22 +137,34 @@ OBJ_CLASS_INSTANCE(
/* constructor - used to initialize state of registry value instance */
static void orte_gpr_notify_data_construct(orte_gpr_notify_data_t* ptr)
{
ptr->name = NULL;
ptr->id = ORTE_GPR_SUBSCRIPTION_ID_MAX;
ptr->remove = false;
ptr->cnt = 0;
ptr->values = NULL;
orte_pointer_array_init(&(ptr->values), orte_gpr_array_block_size,
orte_gpr_array_max_size,
orte_gpr_array_block_size);
}
/* destructor - used to free any resources held by instance */
static void orte_gpr_notify_data_destructor(orte_gpr_notify_data_t* ptr)
{
size_t i;
size_t i, j;
orte_gpr_value_t **values;
if (0 < ptr->cnt && NULL != ptr->values) {
for (i=0; i < ptr->cnt; i++) {
if (NULL != ptr->values[i])
OBJ_RELEASE(ptr->values[i]);
if (NULL != ptr->name) free(ptr->name);
if (NULL != ptr->values) {
values = (orte_gpr_value_t**)(ptr->values)->addr;
for (i=0, j=0; j < ptr->cnt &&
i < (ptr->values)->size; i++) {
if (NULL != values[i]) {
j++;
OBJ_RELEASE(values[i]);
}
}
free(ptr->values);
OBJ_RELEASE(ptr->values);
}
}
@ -201,6 +221,8 @@ static void orte_gpr_trigger_construct(orte_gpr_trigger_t* trig)
trig->action = 0;
trig->cnt = 0;
trig->values = NULL;
trig->cbfunc = NULL;
trig->user_tag = NULL;
}
/* destructor - used to free any resources held by instance */
@ -229,20 +251,33 @@ OBJ_CLASS_INSTANCE(
/* constructor - used to initialize notify message instance */
static void orte_gpr_notify_message_construct(orte_gpr_notify_message_t* msg)
{
msg->name = NULL;
msg->id = ORTE_GPR_TRIGGER_ID_MAX;
msg->remove = false;
msg->cnt = 0;
msg->data = NULL;
orte_pointer_array_init(&(msg->data), orte_gpr_array_block_size,
orte_gpr_array_max_size,
orte_gpr_array_block_size);
}
/* destructor - used to free any resources held by instance */
static void orte_gpr_notify_message_destructor(orte_gpr_notify_message_t* msg)
{
size_t i;
size_t i, j;
orte_gpr_notify_data_t **data;
if (0 < msg->cnt && NULL != msg->data) {
for (i=0; i < msg->cnt; i++) {
if (NULL != msg->data[i]) OBJ_RELEASE(msg->data[i]);
if (NULL != msg->name) free(msg->name);
if (NULL != msg->data) {
data = (orte_gpr_notify_data_t**)(msg->data)->addr;
for (i=0, j=0; j < msg->cnt &&
i < (msg->data)->size; i++) {
if (NULL != data[i]) {
j++;
OBJ_RELEASE(data[i]);
}
}
free(msg->data);
OBJ_RELEASE(msg->data);
}
}
@ -259,6 +294,7 @@ OBJ_CLASS_INSTANCE(
* Global variables
*/
int orte_gpr_base_output = -1;
size_t orte_gpr_array_max_size, orte_gpr_array_block_size;
orte_gpr_base_module_t orte_gpr;
bool orte_gpr_base_selected = false;
opal_list_t orte_gpr_base_components_available;
@ -272,7 +308,7 @@ opal_mutex_t orte_gpr_mutex;
*/
int orte_gpr_base_open(void)
{
int param, value, rc;
int param, value, rc, id;
orte_data_type_t tmp;
/* Debugging / verbose output */
@ -286,6 +322,16 @@ int orte_gpr_base_open(void)
orte_gpr_base_output = -1;
}
id = mca_base_param_register_int("gpr", "base", "maxsize", NULL,
ORTE_GPR_ARRAY_MAX_SIZE);
mca_base_param_lookup_int(id, &param);
orte_gpr_array_max_size = (size_t)param;
id = mca_base_param_register_int("gpr", "base", "blocksize", NULL,
ORTE_GPR_ARRAY_BLOCK_SIZE);
mca_base_param_lookup_int(id, &param);
orte_gpr_array_block_size = (size_t)param;
/* register the base data types with the DPS */
tmp = ORTE_GPR_CMD;
if (ORTE_SUCCESS != (rc = orte_dps.register_type(orte_gpr_base_pack_cmd,
@ -375,6 +421,14 @@ int orte_gpr_base_open(void)
return rc;
}
tmp = ORTE_GPR_NOTIFY_MSG;
if (ORTE_SUCCESS != (rc = orte_dps.register_type(orte_gpr_base_pack_notify_msg,
orte_gpr_base_unpack_notify_msg,
"ORTE_GPR_NOTIFY_MSG", &tmp))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* Open up all available components */
if (OMPI_SUCCESS !=

Просмотреть файл

@ -40,7 +40,8 @@ int orte_gpr_base_dump_notify_msg(orte_buffer_t *buffer,
orte_gpr_notify_message_t *msg)
{
char *tmp_out;
size_t i;
orte_gpr_notify_data_t **data;
size_t i, j;
asprintf(&tmp_out, "\nDUMP OF NOTIFY MESSAGE STRUCTURE");
orte_gpr_base_dump_load_string(buffer, &tmp_out);
@ -51,16 +52,31 @@ int orte_gpr_base_dump_notify_msg(orte_buffer_t *buffer,
return ORTE_SUCCESS;
}
asprintf(&tmp_out, "%lu Notify data structures in message",
if (NULL == msg->name) {
asprintf(&tmp_out, "\tTrigger name: NULL");
} else {
asprintf(&tmp_out, "\tTrigger name: %s", msg->name);
}
orte_gpr_base_dump_load_string(buffer, &tmp_out);
asprintf(&tmp_out, "\tTrigger id: %d", msg->id);
orte_gpr_base_dump_load_string(buffer, &tmp_out);
asprintf(&tmp_out, "\t%lu Notify data structures in message",
(unsigned long) msg->cnt);
orte_gpr_base_dump_load_string(buffer, &tmp_out);
if (0 < msg->cnt && NULL != msg->data) {
for (i=0; i < msg->cnt; i++) {
asprintf(&tmp_out, "\nDump of notify data structure number %lu",
(unsigned long) i);
orte_gpr_base_dump_load_string(buffer, &tmp_out);
orte_gpr_base_dump_data(buffer, msg->data[i]);
if (0 < msg->cnt) {
data = (orte_gpr_notify_data_t**)(msg->data)->addr;
for (i=0, j=0; j < msg->cnt &&
i < (msg->data)->size; i++) {
if (NULL != data[i]) {
asprintf(&tmp_out, "\nDump of notify data structure number %lu",
(unsigned long) j);
orte_gpr_base_dump_load_string(buffer, &tmp_out);
orte_gpr_base_dump_data(buffer, data[i]);
j++;
}
}
}
@ -80,7 +96,7 @@ int orte_gpr_base_dump_notify_data(orte_buffer_t *buffer,
orte_gpr_base_dump_load_string(buffer, &tmp_out);
return ORTE_SUCCESS;
}
orte_gpr_base_dump_data(buffer, data);
return ORTE_SUCCESS;
}
@ -90,21 +106,25 @@ static void orte_gpr_base_dump_data(orte_buffer_t *buffer,
{
char *tmp_out;
orte_gpr_value_t **values;
size_t i;
size_t i, j;
asprintf(&tmp_out, "%lu values going to subscription num %lu",
if (NULL != data->name) {
asprintf(&tmp_out, "%lu values going to subscription name %s",
(unsigned long) data->cnt, data->name);
} else {
asprintf(&tmp_out, "%lu values going to subscription num %lu",
(unsigned long) data->cnt, (unsigned long) data->id);
}
orte_gpr_base_dump_load_string(buffer, &tmp_out);
if (0 < data->cnt && NULL != data->values) {
values = data->values;
for (i=0; i < data->cnt; i++) {
asprintf(&tmp_out, "\nData for value %lu", (unsigned long) i);
orte_gpr_base_dump_load_string(buffer, &tmp_out);
if (NULL == values[i]) {
asprintf(&tmp_out, "\tError encountered: NULL value pointer");
values = (orte_gpr_value_t**)(data->values)->addr;
if (0 < data->cnt) {
for (i=0, j=0; j < data->cnt &&
i < (data->values)->size; i++) {
if (NULL != values[i]) {
j++;
asprintf(&tmp_out, "\nData for value %lu", (unsigned long) j);
orte_gpr_base_dump_load_string(buffer, &tmp_out);
} else {
orte_gpr_base_dump_value(buffer, values[i]);
}
}
@ -117,8 +137,13 @@ int orte_gpr_base_dump_value(orte_buffer_t *buffer, orte_gpr_value_t *value)
orte_gpr_addr_mode_t addr;
size_t j;
asprintf(&tmp_out, "\tValue from segment %s with %lu keyvals",
value->segment, (unsigned long) value->cnt);
if (NULL == value->segment) {
asprintf(&tmp_out, "\tNULL segment name in value - %lu keyvals",
(unsigned long) value->cnt);
} else {
asprintf(&tmp_out, "\tValue from segment %s with %lu keyvals",
value->segment, (unsigned long) value->cnt);
}
orte_gpr_base_dump_load_string(buffer, &tmp_out);
addr = value->addr_mode;
@ -130,8 +155,13 @@ int orte_gpr_base_dump_value(orte_buffer_t *buffer, orte_gpr_value_t *value)
(unsigned long) value->num_tokens);
orte_gpr_base_dump_load_string(buffer, &tmp_out);
for (j=0; j < value->num_tokens; j++) {
asprintf(&tmp_out, "\tToken %lu: %s", (unsigned long) j,
if (NULL == value->tokens[j]) {
asprintf(&tmp_out, "\tToken %lu: NULL token pointer",
(unsigned long) j);
} else {
asprintf(&tmp_out, "\tToken %lu: %s", (unsigned long) j,
value->tokens[j]);
}
orte_gpr_base_dump_load_string(buffer, &tmp_out);
}
}
@ -186,8 +216,13 @@ int orte_gpr_base_dump_value(orte_buffer_t *buffer, orte_gpr_value_t *value)
}
for (j=0; j < value->cnt; j++) {
asprintf(&tmp_out, "\t\tData for keyval %lu: Key: %s",
(unsigned long) j, (value->keyvals[j])->key);
if (NULL == (value->keyvals[j])->key) {
asprintf(&tmp_out, "\t\tData for keyval %lu: NULL key",
(unsigned long) j);
} else {
asprintf(&tmp_out, "\t\tData for keyval %lu: Key: %s",
(unsigned long) j, (value->keyvals[j])->key);
}
orte_gpr_base_dump_load_string(buffer, &tmp_out);
orte_gpr_base_dump_keyval_value(buffer, value->keyvals[j]);
}

Просмотреть файл

@ -37,6 +37,7 @@
#include "mca/schema/schema.h"
#include "opal/class/opal_object.h"
#include "class/orte_pointer_array.h"
#include "dps/dps_types.h"
#include "mca/ns/ns_types.h"
#include "mca/rmgr/rmgr_types.h"
@ -58,8 +59,7 @@ extern "C" {
#define ORTE_GPR_NOTIFY_ALL (uint8_t)0x0f /**< Notifies subscriber upon any action */
#define ORTE_GPR_NOTIFY_PRE_EXISTING (uint8_t)0x10 /**< Provide list of all pre-existing data */
#define ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG (uint8_t)0x20 /**< Notifies are off when subscription entered - turned on when trigger fires */
#define ORTE_GPR_NOTIFY_NO_DATA_WITH_TRIG (uint8_t)0x40 /**< Do not include subscription data when initial trigger fires */
#define ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG (uint8_t)0x80
#define ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG (uint8_t)0x40 /**< Delete this subscription after associated trigger fires */
#define ORTE_GPR_NOTIFY_ANY (uint8_t)0xff /**< Used to test if any action flags set */
typedef uint8_t orte_gpr_notify_action_t;
@ -70,8 +70,9 @@ typedef size_t orte_gpr_subscription_id_t;
#define ORTE_GPR_SUBSCRIPTION_ID_MAX SIZE_MAX
#define ORTE_GPR_TRIG_ONE_SHOT (uint8_t)0x01 /**< Only trigger once - then delete subscription */
#define ORTE_GPR_TRIG_INCLUDE_DATA (uint8_t)0x02 /**< Include the trigger data in the notification msg */
#define ORTE_GPR_TRIG_INCLUDE_TRIG_CNTRS (uint8_t)0x01 /**< Include the trigger data in the notification msg */
#define ORTE_GPR_TRIG_ONE_SHOT (uint8_t)0x02 /**< Only trigger once - then delete trigger */
#define ORTE_GPR_TRIG_ROUTE_DATA_THRU_ME (uint8_t)0x04 /**< send all associated data to trigger callback fn */
#define ORTE_GPR_TRIG_AT_LEVEL (uint8_t)0x08 /**< Trigger whenever count reaches specified level */
#define ORTE_GPR_TRIG_CMP_LEVELS (uint8_t)0x80 /**< Trigger when all the specified values are equal */
#define ORTE_GPR_TRIG_ALL_AT (uint8_t)0x7f /**< Use all trig defs except include trig data with AT - a typical situation */
@ -135,6 +136,7 @@ typedef union { /* shared storage for the value */
orte_process_name_t proc;
orte_vpid_t vpid;
orte_jobid_t jobid;
/* orte_jobgrp_t jobgrp; */
orte_cellid_t cellid;
orte_node_state_t node_state;
orte_proc_state_t proc_state;
@ -185,9 +187,11 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(orte_gpr_value_t);
*/
typedef struct {
opal_object_t super; /**< Makes this an object */
char *name; /**< Name of the associated subscripton, if provided */
orte_gpr_subscription_id_t id; /**< Number of the associated subscription */
bool remove; /**< Remove this subscription from recipient's tracker */
size_t cnt; /**< Number of value objects returned, one per container */
orte_gpr_value_t **values; /**< Array of value objects returned */
orte_pointer_array_t *values; /**< Array of value objects returned */
} orte_gpr_notify_data_t;
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(orte_gpr_notify_data_t);
@ -196,8 +200,11 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(orte_gpr_notify_data_t);
*/
typedef struct {
opal_object_t super; /**< Make this an object */
size_t cnt; /**< number of data objects */
orte_gpr_notify_data_t **data; /**< Contiguous array of pointers to data objects */
char *name; /**< Name of the associated trigger, if provided */
orte_gpr_trigger_id_t id; /**< trigger id, if message comes from trigger (ORTE_GPR_TRIGGER_ID_MAX otherwise) */
bool remove; /**< Remove this trigger from recipient's tracker */
size_t cnt; /**< number of data objects */
orte_pointer_array_t *data; /**< Contiguous array of pointers to data objects */
} orte_gpr_notify_message_t;
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(orte_gpr_notify_message_t);
@ -209,6 +216,13 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(orte_gpr_notify_message_t);
*/
typedef void (*orte_gpr_notify_cb_fn_t)(orte_gpr_notify_data_t *notify_data, void *user_tag);
/** Trigger callback function
* notify_msg = message containing multiple blocks of data provided by trigger
*
* user_tag = whatever tag data the user provided when filing the subscription
*/
typedef void (*orte_gpr_trigger_cb_fn_t)(orte_gpr_notify_message_t *msg, void *user_tag);
/** Structure for registering subscriptions
* A request to be notified when certain events occur, or when counters reach specified
* values, is registered on the registry via a subscription request. This structure
@ -226,7 +240,7 @@ typedef struct {
size_t cnt; /**< Number of values included */
orte_gpr_value_t **values; /**< Contiguous array of pointers to value objects
describing the data to be returned */
orte_gpr_notify_cb_fn_t cbfunc; /**< Function to be called with this data */
orte_gpr_notify_cb_fn_t cbfunc; /**< the callback function */
void *user_tag; /**< User-provided tag to be used in cbfunc */
} orte_gpr_subscription_t;
@ -246,6 +260,8 @@ typedef struct {
size_t cnt; /**< Number of values included */
orte_gpr_value_t **values; /**< Contiguous array of pointers to value objects
describing the objects to be monitored */
orte_gpr_trigger_cb_fn_t cbfunc; /**< the callback function */
void *user_tag; /**< User-provided tag to be used in cbfunc */
} orte_gpr_trigger_t;
OMPI_DECLSPEC OBJ_CLASS_DECLARATION(orte_gpr_trigger_t);

Просмотреть файл

@ -62,8 +62,14 @@ typedef struct {
OBJ_CLASS_DECLARATION(orte_gpr_proxy_subscriber_t);
#define ORTE_GPR_PROXY_MAX_SIZE INT32_MAX
#define ORTE_GPR_PROXY_BLOCK_SIZE 100
typedef struct {
opal_object_t super; /**< Allows this to be an object */
orte_gpr_trigger_id_t id; /**< id of this trigger */
orte_gpr_trigger_cb_fn_t callback; /**< Function to be called for notification */
void *user_tag; /**< User-provided tag for callback function */
} orte_gpr_proxy_trigger_t;
OBJ_CLASS_DECLARATION(orte_gpr_proxy_trigger_t);
/*
@ -71,11 +77,10 @@ OBJ_CLASS_DECLARATION(orte_gpr_proxy_subscriber_t);
*/
typedef struct {
int debug;
size_t block_size;
size_t max_size;
orte_gpr_subscription_id_t num_subs;
orte_pointer_array_t *subscriptions;
orte_gpr_trigger_id_t trig_cntr;
orte_gpr_trigger_id_t num_trigs;
orte_pointer_array_t *triggers;
opal_mutex_t mutex;
bool compound_cmd_mode;
orte_buffer_t *compound_cmd;
@ -209,6 +214,9 @@ int
orte_gpr_proxy_enter_trigger(size_t cnt, orte_gpr_trigger_t **triggers);
int
orte_gpr_proxy_remove_trigger(orte_gpr_trigger_id_t id);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif

Просмотреть файл

@ -140,6 +140,28 @@ OBJ_CLASS_INSTANCE(
orte_gpr_proxy_subscriber_destructor); /* destructor */
/* TRIGGER */
/* constructor - used to initialize trigger instance */
static void orte_gpr_proxy_trigger_construct(orte_gpr_proxy_trigger_t* req)
{
req->callback = NULL;
req->user_tag = NULL;
req->id = 0;
}
/* destructor - used to free any resources held by instance */
static void orte_gpr_proxy_trigger_destructor(orte_gpr_proxy_trigger_t* req)
{
}
/* define instance of opal_class_t */
OBJ_CLASS_INSTANCE(
orte_gpr_proxy_trigger_t, /* type name */
opal_object_t, /* parent "class" name */
orte_gpr_proxy_trigger_construct, /* constructor */
orte_gpr_proxy_trigger_destructor); /* destructor */
/*
* Open the component
*/
@ -155,16 +177,6 @@ int orte_gpr_proxy_open(void)
orte_gpr_proxy_globals.debug = false;
}
id = mca_base_param_register_int("gpr", "proxy", "maxsize", NULL,
ORTE_GPR_PROXY_MAX_SIZE);
mca_base_param_lookup_int(id, &tmp);
orte_gpr_proxy_globals.max_size = (size_t)tmp;
id = mca_base_param_register_int("gpr", "proxy", "blocksize", NULL,
ORTE_GPR_PROXY_BLOCK_SIZE);
mca_base_param_lookup_int(id, &tmp);
orte_gpr_proxy_globals.block_size = (size_t)tmp;
return ORTE_SUCCESS;
}
@ -228,15 +240,24 @@ orte_gpr_proxy_component_init(bool *allow_multi_user_threads, bool *have_hidden_
orte_gpr_proxy_globals.compound_cmd = NULL;
/* initialize the subscription tracker */
if (ORTE_SUCCESS != orte_pointer_array_init(&(orte_gpr_proxy_globals.subscriptions),
orte_gpr_proxy_globals.block_size,
orte_gpr_proxy_globals.max_size,
orte_gpr_proxy_globals.block_size)) {
if (ORTE_SUCCESS != (ret = orte_pointer_array_init(&(orte_gpr_proxy_globals.subscriptions),
orte_gpr_array_block_size,
orte_gpr_array_max_size,
orte_gpr_array_block_size))) {
ORTE_ERROR_LOG(ret);
return NULL;
}
orte_gpr_proxy_globals.num_subs = 0;
/* initialize the trigger counter */
orte_gpr_proxy_globals.trig_cntr = 0;
if (ORTE_SUCCESS != (ret = orte_pointer_array_init(&(orte_gpr_proxy_globals.triggers),
orte_gpr_array_block_size,
orte_gpr_array_max_size,
orte_gpr_array_block_size))) {
ORTE_ERROR_LOG(ret);
return NULL;
}
orte_gpr_proxy_globals.num_trigs = 0;
initialized = true;
return &orte_gpr_proxy;
@ -286,11 +307,12 @@ void orte_gpr_proxy_notify_recv(int status, orte_process_name_t* sender,
void* cbdata)
{
orte_gpr_cmd_flag_t command;
orte_gpr_notify_message_t *msg;
orte_gpr_notify_data_t **data;
orte_gpr_proxy_subscriber_t *sub;
size_t n;
orte_gpr_proxy_trigger_t *trig;
size_t i, n;
int rc;
size_t cnt, i;
if (orte_gpr_proxy_globals.debug) {
opal_output(0, "[%lu,%lu,%lu] gpr proxy: received trigger message",
@ -308,44 +330,73 @@ void orte_gpr_proxy_notify_recv(int status, orte_process_name_t* sender,
goto RETURN_ERROR;
}
msg = OBJ_NEW(orte_gpr_notify_message_t);
if (NULL == msg) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
goto RETURN_ERROR;
}
n = 1;
if (ORTE_SUCCESS != (rc = orte_dps.unpack(buffer, &cnt, &n, ORTE_SIZE))) {
if (ORTE_SUCCESS != (rc = orte_dps.unpack(buffer, &msg, &n, ORTE_GPR_NOTIFY_MSG))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(msg);
goto RETURN_ERROR;
}
if (cnt > 0) {
/* allocate space for the array */
data = (orte_gpr_notify_data_t**)malloc(cnt * sizeof(orte_gpr_notify_data_t*));
if (NULL == data) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
goto RETURN_ERROR;
}
if (ORTE_SUCCESS != (rc = orte_dps.unpack(buffer, data, &cnt, ORTE_GPR_NOTIFY_DATA))) {
ORTE_ERROR_LOG(rc);
goto RETURN_ERROR;
}
/* if the message trigger id is valid (i.e., it is set to
* something other than ORTE_GPR_TRIGGER_ID_MAX), then this
* is an aggregated message intended for a single receiver.
* In that case, look up the associated TRIGGER id and pass
* the entire message to that receiver.
*/
if (ORTE_GPR_TRIGGER_ID_MAX > msg->id) {
trig = (orte_gpr_proxy_globals.triggers)->addr[msg->id];
if (NULL == trig) {
ORTE_ERROR_LOG(ORTE_ERR_GPR_DATA_CORRUPT);
} else {
trig->callback(msg, sub->user_tag);
}
if (msg->remove) {
if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_trigger(msg->id))) {
ORTE_ERROR_LOG(rc);
}
}
OBJ_RELEASE(msg);
goto RETURN_ERROR;
}
/* if the message trigger id was NOT valid, then we split the
* message into its component datagrams and send each of them
* separately to their rescpective subscriber.
*/
for (i=0; i < cnt; i++) {
if (msg->cnt > 0) {
data = (orte_gpr_notify_data_t**)(msg->data)->addr;
for (i=0; i < msg->cnt; i++) {
/* for speed purposes, we take advantage here of
* our knowledge on how this pointer array was
* constructed - we know that it is contiguous
* and that there are no NULL gaps in it.
*/
/* process request */
if (data[i]->id > orte_gpr_proxy_globals.num_subs) {
ORTE_ERROR_LOG(ORTE_ERR_GPR_DATA_CORRUPT);
OBJ_RELEASE(data[i]);
continue;
}
sub = (orte_gpr_proxy_globals.subscriptions)->addr[data[i]->id];
if (NULL == sub) {
ORTE_ERROR_LOG(ORTE_ERR_GPR_DATA_CORRUPT);
ORTE_ERROR_LOG(ORTE_ERR_GPR_DATA_CORRUPT);
} else {
sub->callback(data[i], sub->user_tag);
sub->callback(data[i], sub->user_tag);
}
if (data[i]->remove) {
if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_subscription(data[i]->id))) {
ORTE_ERROR_LOG(rc);
}
}
OBJ_RELEASE(data[i]);
}
/* release data */
free(data);
OBJ_RELEASE(msg);
}
RETURN_ERROR:

Просмотреть файл

@ -54,7 +54,7 @@ orte_gpr_proxy_enter_subscription(size_t cnt, orte_gpr_subscription_t **subscrip
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub->id = (orte_gpr_subscription_id_t)id;
sub->id = orte_gpr_proxy_globals.num_subs;
subscriptions[i]->id = sub->id;
(orte_gpr_proxy_globals.num_subs)++;
}
@ -66,16 +66,35 @@ orte_gpr_proxy_enter_subscription(size_t cnt, orte_gpr_subscription_t **subscrip
int
orte_gpr_proxy_enter_trigger(size_t cnt, orte_gpr_trigger_t **trigs)
{
size_t i;
orte_gpr_proxy_trigger_t *trig;
size_t i, id;
for (i=0; i < cnt; i++) {
if (ORTE_GPR_TRIGGER_ID_MAX-1 > orte_gpr_proxy_globals.trig_cntr) {
trigs[i]->id = orte_gpr_proxy_globals.trig_cntr;
(orte_gpr_proxy_globals.trig_cntr)++;
} else {
trig = OBJ_NEW(orte_gpr_proxy_trigger_t);
if (NULL == trig) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* ensure that the proper routing flag is set
* in the action field to match the trigger callback
* function
*/
if (NULL != trigs[i]->cbfunc) {
trigs[i]->action = trigs[i]->action |
ORTE_GPR_TRIG_ROUTE_DATA_THRU_ME;
} else {
trigs[i]->action = trigs[i]->action &
~ORTE_GPR_TRIG_ROUTE_DATA_THRU_ME;
}
trig->callback = trigs[i]->cbfunc;
trig->user_tag = trigs[i]->user_tag;
if (0 > orte_pointer_array_add(&id, orte_gpr_proxy_globals.triggers, trig)) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
trig->id = orte_gpr_proxy_globals.num_trigs;
trigs[i]->id = trig->id;
(orte_gpr_proxy_globals.num_trigs)++;
}
return ORTE_SUCCESS;
@ -93,3 +112,14 @@ orte_gpr_proxy_remove_subscription(orte_gpr_subscription_id_t id)
return ORTE_SUCCESS;
}
int
orte_gpr_proxy_remove_trigger(orte_gpr_trigger_id_t id)
{
if (NULL != (orte_gpr_proxy_globals.triggers)->addr[id]) {
OBJ_RELEASE((orte_gpr_proxy_globals.triggers)->addr[id]);
orte_pointer_array_set_item(orte_gpr_proxy_globals.triggers, (size_t)id, NULL);
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -267,6 +267,13 @@ int orte_gpr_proxy_cancel_trigger(orte_gpr_trigger_id_t trig)
OPAL_THREAD_LOCK(&orte_gpr_proxy_globals.mutex);
/* remove the specified trigger from the local tracker */
if (ORTE_SUCCESS != (rc = orte_gpr_proxy_remove_trigger(trig))) {
ORTE_ERROR_LOG(rc);
OPAL_THREAD_UNLOCK(&orte_gpr_proxy_globals.mutex);
return rc;
}
/* if the compound cmd mode is on, pack the command into that buffer
* and return
*/

Просмотреть файл

@ -48,8 +48,8 @@ int orte_gpr_replica_preallocate_segment(char *name, size_t num_slots)
}
rc = orte_pointer_array_init(&(seg->containers), num_slots,
orte_gpr_replica_globals.max_size,
orte_gpr_replica_globals.block_size);
orte_gpr_array_max_size,
orte_gpr_array_block_size);
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);

Просмотреть файл

@ -41,10 +41,6 @@ int orte_gpr_replica_remote_notify(orte_process_name_t *recipient,
orte_gpr_cmd_flag_t command;
int rc;
if (orte_gpr_replica_globals.debug) {
opal_output(0, "sending trigger message");
}
command = ORTE_GPR_NOTIFY_CMD;
OBJ_CONSTRUCT(&buffer, orte_buffer_t);
@ -54,19 +50,11 @@ int orte_gpr_replica_remote_notify(orte_process_name_t *recipient,
return rc;
}
if (ORTE_SUCCESS != (rc = orte_dps.pack(&buffer, &(message->cnt), 1, ORTE_SIZE))) {
if (ORTE_SUCCESS != (rc = orte_dps.pack(&buffer, &message, 1, ORTE_GPR_NOTIFY_MSG))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if(message->cnt > 0) {
if (ORTE_SUCCESS != (rc = orte_dps.pack(&buffer, message->data,
message->cnt, ORTE_GPR_NOTIFY_DATA))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
if (0 > orte_rml.send_buffer(recipient, &buffer, ORTE_RML_TAG_GPR_NOTIFY, 0)) {

Просмотреть файл

@ -19,12 +19,13 @@ include $(top_ompi_srcdir)/config/Makefile.options
noinst_LTLIBRARIES = libmca_gpr_replica_fn.la
libmca_gpr_replica_fn_la_SOURCES = \
gpr_replica_fn.h \
gpr_replica_arithmetic_ops_fn.c \
gpr_replica_cleanup_fn.c \
gpr_replica_del_index_fn.c \
gpr_replica_dict_fn.c \
gpr_replica_dump_fn.c \
gpr_replica_local_trig_ops_fn.c \
gpr_replica_messaging_fn.c \
gpr_replica_arithmetic_ops_fn.c \
gpr_replica_put_get_fn.c \
gpr_replica_segment_fn.c \
gpr_replica_subscribe_fn.c \

Просмотреть файл

@ -46,6 +46,8 @@ void orte_gpr_replica_dump_itagval_value(orte_buffer_t *buffer,
static void orte_gpr_replica_dump_trigger(orte_buffer_t *buffer, size_t cnt,
orte_gpr_replica_trigger_t *trig);
static void orte_gpr_replica_dump_subscription(orte_buffer_t *buffer,
orte_gpr_replica_subscription_t *sub);
int orte_gpr_replica_dump_all_fn(orte_buffer_t *buffer)
{
@ -218,6 +220,7 @@ int orte_gpr_replica_dump_callbacks_fn(orte_buffer_t *buffer)
}
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
orte_gpr_base_dump_notify_msg(buffer, cb->message);
i++;
}
}
@ -347,6 +350,7 @@ static void orte_gpr_replica_dump_trigger(orte_buffer_t *buffer, size_t cnt,
size_t i, j;
orte_gpr_replica_counter_t **cntr;
orte_gpr_replica_subscription_t **subs;
orte_gpr_replica_trigger_requestor_t **attached;
tmp_out = (char*)malloc(1000);
if (NULL == tmp_out) {
@ -357,23 +361,72 @@ static void orte_gpr_replica_dump_trigger(orte_buffer_t *buffer, size_t cnt,
sprintf(tmp_out, "\nData for trigger %lu", (unsigned long) trig->index);
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
if (NULL == trig->name) {
sprintf(tmp_out, "\tNOT a named trigger");
} else {
sprintf(tmp_out, "\ttrigger name: %s", trig->name);
}
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
if (0 < trig->num_attached) {
sprintf(tmp_out, "\t%lu requestors attached to this trigger",
(unsigned long) trig->num_attached);
} else {
sprintf(tmp_out, "\tNo requestors attached to this trigger");
}
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
attached = (orte_gpr_replica_trigger_requestor_t**)
(trig->attached)->addr;
for (i=0, j=0; j < trig->num_attached &&
i < (trig->attached)->size; i++) {
if (NULL != attached[i]) {
j++;
if (NULL == attached[i]->requestor) {
sprintf(tmp_out, "\t\tRequestor %lu: LOCAL@idtag %lu",
(unsigned long)j, (unsigned long)attached[i]->idtag);
} else {
sprintf(tmp_out, "\t\tRequestor %lu: [%lu,%lu,%lu]@idtag %lu",
(unsigned long)j, ORTE_NAME_ARGS(attached[i]->requestor),
(unsigned long)attached[i]->idtag);
}
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
}
if (NULL == trig->master) {
sprintf(tmp_out, "\tNO MASTER registered");
} else {
sprintf(tmp_out, "\tTRIGGER MASTER: [%lu,%lu,%lu]@idtag %lu",
ORTE_NAME_ARGS(trig->master->requestor),
(unsigned long)trig->master->idtag);
}
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
if (ORTE_GPR_TRIG_ONE_SHOT & trig->action) {
sprintf(tmp_out, "\t\tORTE_GPR_TRIG_ONE_SHOT");
sprintf(tmp_out, "\tORTE_GPR_TRIG_ONE_SHOT");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_TRIG_AT_LEVEL & trig->action) {
sprintf(tmp_out, "\t\tORTE_GPR_TRIG_AT_LEVEL");
sprintf(tmp_out, "\tORTE_GPR_TRIG_AT_LEVEL");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_TRIG_CMP_LEVELS & trig->action) {
sprintf(tmp_out, "\t\tORTE_GPR_TRIG_CMP_LEVELS");
sprintf(tmp_out, "\tORTE_GPR_TRIG_CMP_LEVELS");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_TRIG_INCLUDE_DATA & trig->action) {
sprintf(tmp_out, "\t\tORTE_GPR_TRIG_INCLUDE_DATA");
if (ORTE_GPR_TRIG_INCLUDE_TRIG_CNTRS & trig->action) {
sprintf(tmp_out, "\tORTE_GPR_TRIG_INCLUDE_TRIG_CNTRS");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (trig->one_shot_fired) {
sprintf(tmp_out, "\tONE SHOT HAS FIRED");
} else {
sprintf(tmp_out, "\tONE SHOT HAS NOT FIRED");
}
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
if (0 < trig->num_counters) {
if (ORTE_GPR_TRIG_AT_LEVEL & trig->action) {
sprintf(tmp_out, "\tTrigger monitoring %lu counters for level",
@ -415,10 +468,7 @@ static void orte_gpr_replica_dump_trigger(orte_buffer_t *buffer, size_t cnt,
i < (trig->subscriptions)->size; i++) {
if (NULL != subs[i]) {
j++;
sprintf(tmp_out, "\t\tSubscription %lu name %s",
(unsigned long) subs[i]->index,
subs[i]->name);
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
orte_gpr_replica_dump_subscription(buffer, subs[i]);
}
}
}
@ -429,11 +479,9 @@ static void orte_gpr_replica_dump_trigger(orte_buffer_t *buffer, size_t cnt,
int orte_gpr_replica_dump_subscriptions_fn(orte_buffer_t *buffer)
{
char *tmp_out, *token, *tmp;
size_t i, j, k, m, n, p;
char *tmp_out, *tmp;
size_t i, m;
orte_gpr_replica_subscription_t **subs;
orte_gpr_replica_requestor_t **reqs;
orte_gpr_replica_ivalue_t **ivals;
tmp_out = (char*)malloc(1000);
if (NULL == tmp_out) {
@ -449,180 +497,210 @@ int orte_gpr_replica_dump_subscriptions_fn(orte_buffer_t *buffer)
sprintf(tmp_out, "Number of subscriptions: %lu\n", (unsigned long) orte_gpr_replica.num_subs);
orte_gpr_replica_dump_load_string(buffer, &tmp);
/* dump the trigger info for the registry */
/* dump the subscription info for the registry */
for (i=0, m=0; m < orte_gpr_replica.num_subs &&
i < (orte_gpr_replica.subscriptions)->size; i++) {
if (NULL != subs[i]) {
m++;
sprintf(tmp_out, "Info for Subscription %lu named %s",
(unsigned long) subs[i]->index, subs[i]->name);
orte_gpr_replica_dump_load_string(buffer, &tmp);
if (subs[i]->active) {
sprintf(tmp_out, "\tSubscription ACTIVE");
orte_gpr_replica_dump_subscription(buffer, subs[i]);
}
}
free(tmp_out);
return ORTE_SUCCESS;
}
static void orte_gpr_replica_dump_subscription(orte_buffer_t *buffer,
orte_gpr_replica_subscription_t *sub)
{
char *tmp_out, *token, *tmp;
size_t i, j, k, m, n, p;
orte_gpr_replica_requestor_t **reqs;
orte_gpr_replica_ivalue_t **ivals;
tmp_out = (char*)malloc(1000);
if (NULL == tmp_out) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
tmp = tmp_out;
if (NULL == sub->name) {
sprintf(tmp, "\t\tSubscription %lu: UNNAMED",
(unsigned long) sub->index);
} else {
sprintf(tmp, "\t\tSubscription %lu name %s",
(unsigned long) sub->index,
sub->name);
}
orte_gpr_replica_dump_load_string(buffer, &tmp);
if (sub->active) {
sprintf(tmp_out, "\tSubscription ACTIVE");
} else {
sprintf(tmp_out, "\tSubscription INACTIVE");
}
orte_gpr_replica_dump_load_string(buffer, &tmp);
if (sub->cleanup) {
sprintf(tmp_out, "\tSubscription scheduled for cleanup");
} else {
sprintf(tmp_out, "\tSubscription NOT scheduled for cleanup");
}
orte_gpr_replica_dump_load_string(buffer, &tmp);
/* output recipient info */
sprintf(tmp_out, "\tList of requestors for this subscription:");
orte_gpr_replica_dump_load_string(buffer, &tmp);
reqs = (orte_gpr_replica_requestor_t**)(sub->requestors)->addr;
for (j=0, k=0; k < sub->num_requestors &&
j < (sub->requestors)->size; j++) {
if (NULL != reqs[j]) {
k++;
if (NULL == reqs[j]->requestor) {
sprintf(tmp_out, "\t\tRequestor: LOCAL @ subscription id %lu",
(unsigned long) reqs[j]->idtag);
} else {
sprintf(tmp_out, "\tSubscription INACTIVE");
sprintf(tmp_out, "\t\tRequestor: [%lu,%lu,%lu] @ subscription id %lu",
ORTE_NAME_ARGS(reqs[j]->requestor),
(unsigned long) reqs[j]->idtag);
}
orte_gpr_replica_dump_load_string(buffer, &tmp);
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
}
sprintf(tmp_out, "\tActions:");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
if (ORTE_GPR_NOTIFY_VALUE_CHG & sub->action) {
sprintf(tmp_out, "\t\tORTE_GPR_NOTIFY_VALUE_CHG");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
} else if (ORTE_GPR_NOTIFY_VALUE_CHG_TO & sub->action) {
sprintf(tmp_out, "\t\tORTE_GPR_NOTIFY_VALUE_CHG_TO");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
} else if (ORTE_GPR_NOTIFY_VALUE_CHG_FRM & sub->action) {
sprintf(tmp_out, "\t\tORTE_GPR_NOTIFY_VALUE_CHG_FRM");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_NOTIFY_DEL_ENTRY & sub->action) {
sprintf(tmp_out, "\t\tORTE_GPR_NOTIFY_DEL_ENTRY");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_NOTIFY_ADD_ENTRY & sub->action) {
sprintf(tmp_out, "\t\tORTE_GPR_NOTIFY_ADD_ENTRY");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_NOTIFY_PRE_EXISTING & sub->action) {
sprintf(tmp_out, "\t\tORTE_GPR_NOTIFY_PRE_EXISTING");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG & sub->action) {
sprintf(tmp_out, "\t\tORTE_GPR_NOTIFY_STARTS_AFTER_TRIG");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG & sub->action) {
sprintf(tmp_out, "\t\tORTE_GPR_NOTIFY_DELETE_AFTER_TRIG");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
sprintf(tmp_out, "\n\tData covered by this subscription");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
ivals = (orte_gpr_replica_ivalue_t**)(sub->values)->addr;
for (n=0, p=0; p < sub->num_values &&
n < (sub->values)->size; n++) {
if (NULL != ivals[n]) {
p++;
sprintf(tmp_out, "\t\tData on segment %s", (ivals[n]->seg)->name);
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
/* output recipient info */
sprintf(tmp_out, "\tList of requestors for this subscription:");
orte_gpr_replica_dump_load_string(buffer, &tmp);
reqs = (orte_gpr_replica_requestor_t**)(subs[i]->requestors)->addr;
for (j=0, k=0; k < subs[i]->num_requestors &&
j < (subs[i]->requestors)->size; j++) {
if (NULL != reqs[j]) {
k++;
if (NULL == reqs[j]->requestor) {
sprintf(tmp_out, "\t\tRequestor: LOCAL @ subscription id %lu",
(unsigned long) reqs[j]->idtag);
} else {
sprintf(tmp_out, "\t\tRequestor: [%lu,%lu,%lu] @ subscription id %lu",
ORTE_NAME_ARGS(reqs[j]->requestor),
(unsigned long) reqs[j]->idtag);
k = (int)orte_value_array_get_size(&(ivals[n]->tokentags));
if (0 == k) {
sprintf(tmp_out, "\t\tNULL token (wildcard)");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
} else {
sprintf(tmp_out, "\t\tNumber of tokens: %lu",
(unsigned long) k);
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
for (j=0; j < k; j++) {
if (ORTE_SUCCESS == orte_gpr_replica_dict_reverse_lookup(&token, ivals[n]->seg,
ORTE_VALUE_ARRAY_GET_ITEM(&(ivals[n]->tokentags), orte_gpr_replica_itag_t, j))) {
sprintf(tmp_out, "\t\t\tToken: %s", token);
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
free(token);
}
}
}
sprintf(tmp_out, "\t\tToken addressing mode:\n");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
if (ORTE_GPR_TOKENS_NOT & ivals[n]->addr_mode) {
sprintf(tmp_out, "\t\t\tORTE_GPR_TOKENS_NOT\n");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_TOKENS_AND & ivals[n]->addr_mode) {
sprintf(tmp_out, "\t\t\tORTE_GPR_TOKENS_AND\n");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_TOKENS_OR & ivals[n]->addr_mode) {
sprintf(tmp_out, "\t\t\tORTE_GPR_TOKENS_OR\n");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_TOKENS_XAND & ivals[n]->addr_mode) {
sprintf(tmp_out, "\t\t\tORTE_GPR_TOKENS_XAND\n");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_TOKENS_XOR & ivals[n]->addr_mode) {
sprintf(tmp_out, "\t\t\tORTE_GPR_TOKENS_XOR\n");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
k = (int)orte_value_array_get_size(&(ivals[n]->keytags));
if (0 == k) {
sprintf(tmp_out, "\t\tNULL key (wildcard)");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
} else {
sprintf(tmp_out, "\t\tNumber of keys: %lu", (unsigned long) k);
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
for (j=0; j < k; j++) {
if (ORTE_SUCCESS == orte_gpr_replica_dict_reverse_lookup(&token, ivals[n]->seg,
ORTE_VALUE_ARRAY_GET_ITEM(&(ivals[n]->keytags), orte_gpr_replica_itag_t, j))) {
sprintf(tmp_out, "\t\t\tKey: %s", token);
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
free(token);
}
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
}
sprintf(tmp_out, "\tActions:");
sprintf(tmp_out, "\t\tKey addressing mode:\n");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
if (ORTE_GPR_NOTIFY_VALUE_CHG & subs[i]->action) {
sprintf(tmp_out, "\t\tORTE_GPR_NOTIFY_VALUE_CHG");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
} else if (ORTE_GPR_NOTIFY_VALUE_CHG_TO & subs[i]->action) {
sprintf(tmp_out, "\t\tORTE_GPR_NOTIFY_VALUE_CHG_TO");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
} else if (ORTE_GPR_NOTIFY_VALUE_CHG_FRM & subs[i]->action) {
sprintf(tmp_out, "\t\tORTE_GPR_NOTIFY_VALUE_CHG_FRM");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_NOTIFY_DEL_ENTRY & subs[i]->action) {
sprintf(tmp_out, "\t\tORTE_GPR_NOTIFY_DEL_ENTRY");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_NOTIFY_ADD_ENTRY & subs[i]->action) {
sprintf(tmp_out, "\t\tORTE_GPR_NOTIFY_ADD_ENTRY");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_NOTIFY_PRE_EXISTING & subs[i]->action) {
sprintf(tmp_out, "\t\tORTE_GPR_NOTIFY_PRE_EXISTING");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_NOTIFY_STARTS_AFTER_TRIG & subs[i]->action) {
sprintf(tmp_out, "\t\tORTE_GPR_NOTIFY_STARTS_AFTER_TRIG");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_NOTIFY_NO_DATA_WITH_TRIG & subs[i]->action) {
sprintf(tmp_out, "\t\tORTE_GPR_NOTIFY_NO_DATA_WITH_TRIG");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG & subs[i]->action) {
sprintf(tmp_out, "\t\tORTE_GPR_NOTIFY_DELETE_AFTER_TRIG");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
sprintf(tmp_out, "\n\tData covered by this subscription");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
ivals = (orte_gpr_replica_ivalue_t**)(subs[i]->values)->addr;
for (n=0, p=0; p < subs[i]->num_values &&
n < (subs[i]->values)->size; n++) {
if (NULL != ivals[n]) {
p++;
sprintf(tmp_out, "\t\tData on segment %s", (ivals[n]->seg)->name);
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
k = (int)orte_value_array_get_size(&(ivals[n]->tokentags));
if (0 == k) {
sprintf(tmp_out, "\t\tNULL token (wildcard)");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
} else {
sprintf(tmp_out, "\t\tNumber of tokens: %lu",
(unsigned long) k);
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
for (j=0; j < k; j++) {
if (ORTE_SUCCESS == orte_gpr_replica_dict_reverse_lookup(&token, ivals[n]->seg,
ORTE_VALUE_ARRAY_GET_ITEM(&(ivals[n]->tokentags), orte_gpr_replica_itag_t, j))) {
sprintf(tmp_out, "\t\t\tToken: %s", token);
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
free(token);
}
}
}
sprintf(tmp_out, "\t\tToken addressing mode:\n");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
if (ORTE_GPR_TOKENS_NOT & ivals[n]->addr_mode) {
sprintf(tmp_out, "\t\t\tORTE_GPR_TOKENS_NOT\n");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_TOKENS_AND & ivals[n]->addr_mode) {
sprintf(tmp_out, "\t\t\tORTE_GPR_TOKENS_AND\n");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_TOKENS_OR & ivals[n]->addr_mode) {
sprintf(tmp_out, "\t\t\tORTE_GPR_TOKENS_OR\n");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_TOKENS_XAND & ivals[n]->addr_mode) {
sprintf(tmp_out, "\t\t\tORTE_GPR_TOKENS_XAND\n");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_TOKENS_XOR & ivals[n]->addr_mode) {
sprintf(tmp_out, "\t\t\tORTE_GPR_TOKENS_XOR\n");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
k = (int)orte_value_array_get_size(&(ivals[n]->keytags));
if (0 == k) {
sprintf(tmp_out, "\t\tNULL key (wildcard)");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
} else {
sprintf(tmp_out, "\t\tNumber of keys: %lu", (unsigned long) k);
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
for (j=0; j < k; j++) {
if (ORTE_SUCCESS == orte_gpr_replica_dict_reverse_lookup(&token, ivals[n]->seg,
ORTE_VALUE_ARRAY_GET_ITEM(&(ivals[n]->keytags), orte_gpr_replica_itag_t, j))) {
sprintf(tmp_out, "\t\t\tKey: %s", token);
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
free(token);
}
}
}
sprintf(tmp_out, "\t\tKey addressing mode:\n");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
if (ORTE_GPR_KEYS_NOT & ivals[n]->addr_mode) {
sprintf(tmp_out, "\t\t\tORTE_GPR_KEYS_NOT\n");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_KEYS_AND & ivals[n]->addr_mode) {
sprintf(tmp_out, "\t\t\tORTE_GPR_KEYS_AND\n");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_KEYS_OR & ivals[n]->addr_mode) {
sprintf(tmp_out, "\t\t\tORTE_GPR_KEYS_OR\n");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_KEYS_XAND & ivals[n]->addr_mode) {
sprintf(tmp_out, "\t\t\tORTE_GPR_KEYS_XAND\n");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_KEYS_XOR & ivals[n]->addr_mode) {
sprintf(tmp_out, "\t\t\tORTE_GPR_KEYS_XOR\n");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
} /* if ivals[n] not NULL */
} /* for n */
} /* if subs[i] not NULL */
} /* for i */
if (ORTE_GPR_KEYS_NOT & ivals[n]->addr_mode) {
sprintf(tmp_out, "\t\t\tORTE_GPR_KEYS_NOT\n");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_KEYS_AND & ivals[n]->addr_mode) {
sprintf(tmp_out, "\t\t\tORTE_GPR_KEYS_AND\n");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_KEYS_OR & ivals[n]->addr_mode) {
sprintf(tmp_out, "\t\t\tORTE_GPR_KEYS_OR\n");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_KEYS_XAND & ivals[n]->addr_mode) {
sprintf(tmp_out, "\t\t\tORTE_GPR_KEYS_XAND\n");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
if (ORTE_GPR_KEYS_XOR & ivals[n]->addr_mode) {
sprintf(tmp_out, "\t\t\tORTE_GPR_KEYS_XOR\n");
orte_gpr_replica_dump_load_string(buffer, &tmp_out);
}
} /* if ivals[n] not NULL */
} /* for n */
free(tmp_out);
return ORTE_SUCCESS;
return;
}

Просмотреть файл

@ -201,6 +201,10 @@ int orte_gpr_replica_enter_local_subscription(size_t cnt, orte_gpr_subscription_
int orte_gpr_replica_enter_local_trigger(size_t cnt, orte_gpr_trigger_t **trigs);
int orte_gpr_replica_remove_local_subscription(orte_gpr_subscription_id_t id);
int orte_gpr_proxy_remove_local_trigger(orte_gpr_trigger_id_t id);
int orte_gpr_replica_record_action(orte_gpr_replica_segment_t *seg,
orte_gpr_replica_container_t *cptr,
orte_gpr_replica_itagval_t *iptr,
@ -241,16 +245,17 @@ int
orte_gpr_replica_remove_trigger(orte_process_name_t *requestor,
orte_gpr_trigger_id_t id);
int orte_gpr_replica_register_callback(orte_gpr_replica_subscription_t *sub,
int orte_gpr_replica_register_callback(orte_gpr_replica_trigger_t *trig,
orte_gpr_replica_subscription_t *sub,
orte_gpr_value_t *value);
int orte_gpr_replica_define_callback(orte_gpr_replica_callbacks_t **cbptr,
orte_process_name_t *recipient);
int orte_gpr_replica_process_callbacks(void);
int orte_gpr_replica_purge_subscriptions(orte_process_name_t *proc);
int orte_gpr_replica_add_values_from_registry(orte_gpr_notify_message_t *msg,
orte_gpr_replica_subscription_t *sptr);
int orte_gpr_replica_store_value_in_msg(orte_gpr_subscription_id_t id,
orte_gpr_notify_message_t *msg,
size_t cnt,

Просмотреть файл

@ -39,24 +39,19 @@
#include "mca/gpr/replica/communications/gpr_replica_comm.h"
#include "gpr_replica_fn.h"
static int orte_gpr_replica_add_value_to_datagram(orte_gpr_notify_data_t *data,
size_t cnt, orte_gpr_value_t **values);
int orte_gpr_replica_process_callbacks(void)
{
orte_gpr_replica_callbacks_t *cb;
orte_gpr_notify_data_t **data;
orte_gpr_replica_trigger_t **trigs;
orte_gpr_replica_local_trigger_t **local_trigs;
orte_gpr_replica_subscription_t **subs;
orte_gpr_replica_local_subscriber_t **local_subs;
orte_gpr_replica_requestor_t **reqs;
orte_gpr_replica_local_subscriber_t *local_sub;
size_t i, j, k, m;
bool processed;
int rc;
if (orte_gpr_replica_globals.debug) {
opal_output(0, "gpr replica: process_callbacks entered");
}
/* check and set flag indicating callbacks being processed */
if (orte_gpr_replica.processing_callbacks) {
return ORTE_SUCCESS;
@ -64,49 +59,81 @@ int orte_gpr_replica_process_callbacks(void)
orte_gpr_replica.processing_callbacks = true;
while (NULL != (cb = (orte_gpr_replica_callbacks_t*)opal_list_remove_last(&orte_gpr_replica.callbacks))) {
if (NULL == cb->requestor) { /* local callback */
if (orte_gpr_replica_globals.debug) {
opal_output(0, "process_callbacks: local");
}
if (NULL == cb->requestor) { /* local callback */
/* each callback corresponds to a specific requestor
* The message in the callback consists of at least one (and can
* be more) "datagrams" intended for that requestor, each of which
* is slated to be returned to a specific
* subscription that corresponds to a specific callback
* function on the requestor.
*
* Since this requestor is "local", we simply execute
* the callbacks ourself.
*/
data = (orte_gpr_notify_data_t**)((cb->message)->data);
/* we first have to check the trigger id in the message. If that
* field is set to a valid value (i.e., one other than
* ORTE_GPR_TRIGGER_ID_MAX), then the message is intended to be
* sent as a single block to that trigger's callback function.
*/
if (ORTE_GPR_TRIGGER_ID_MAX > (cb->message)->id) {
/* use the local trigger callback */
local_trigs = (orte_gpr_replica_local_trigger_t**)
(orte_gpr_replica_globals.local_triggers)->addr;
for (i=0, j=0; j < orte_gpr_replica_globals.num_local_trigs &&
i < (orte_gpr_replica_globals.local_triggers)->size; i++) {
if (NULL != local_trigs[i]) {
j++;
if ((cb->message)->id == local_trigs[i]->id) {
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
local_trigs[i]->callback(cb->message, local_trigs[i]->user_tag);
OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex);
goto CLEANUP;
}
}
}
/* get here if the trigger could not be found */
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
goto CLEANUP;
}
/* get here if the trigger id indicated that this was NOT
* intended for a trigger callback - i.e., the message should
* be broken into its component parts and delivered separately
* to the indicated subscribers
*/
data = (orte_gpr_notify_data_t**)((cb->message)->data)->addr;
for (i=0; i < (cb->message)->cnt; i++) {
/* for each datagram in the message, we need to lookup
* the associated subscription id to find the correct
* callback function. This subscription id is in the
* data object itself, and references the local_subscriptions
* array of objects.
* callback function.
*/
local_sub = (orte_gpr_replica_local_subscriber_t*)
(orte_gpr_replica_globals.local_subscriptions)->addr[data[i]->id];
if (NULL == local_sub) { /* this subscription has been deleted - error */
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
goto CLEANUP;
local_subs = (orte_gpr_replica_local_subscriber_t**)
(orte_gpr_replica_globals.local_subscriptions)->addr;
processed = false;
for (j=0, k=0; !processed &&
k < orte_gpr_replica_globals.num_local_subs &&
j < (orte_gpr_replica_globals.local_subscriptions)->size; j++) {
if (NULL != local_subs[j]) {
k++;
if (data[i]->id == local_subs[j]->id) {
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
local_subs[j]->callback(data[i], local_subs[j]->user_tag);
OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex);
processed = true;
}
}
}
/* get here and not processed => not found */
if (!processed) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
}
OPAL_THREAD_UNLOCK(&orte_gpr_replica_globals.mutex);
local_sub->callback(data[i], local_sub->user_tag);
OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex);
}
} else { /* remote request - send messages back */
if (orte_gpr_replica_globals.debug) {
opal_output(0, "process_callbacks: remote to [%lu,%lu,%lu]",
ORTE_NAME_ARGS(cb->requestor));
}
orte_gpr_replica_remote_notify(cb->requestor, cb->message);
} else { /* remote request - send messages back */
orte_gpr_replica_remote_notify(cb->requestor, cb->message);
}
CLEANUP:
OBJ_RELEASE(cb);
OBJ_RELEASE(cb);
}
/* cleanup any one-shot triggers that fired */
@ -158,7 +185,8 @@ CLEANUP:
int orte_gpr_replica_register_callback(orte_gpr_replica_subscription_t *sub,
int orte_gpr_replica_register_callback(orte_gpr_replica_trigger_t *trig,
orte_gpr_replica_subscription_t *sub,
orte_gpr_value_t *value)
{
orte_gpr_replica_callbacks_t *cb;
@ -236,6 +264,11 @@ int orte_gpr_replica_register_callback(orte_gpr_replica_subscription_t *sub,
for (k=0; k < interim; k++) {
values[k+cnt] = vals[k];
}
/* release the array of pointers - the pointers themselves
* will remain "alive" in the values array to be released
* later
*/
free(vals);
/* update the count */
cnt += interim;
}
@ -259,336 +292,218 @@ int orte_gpr_replica_register_callback(orte_gpr_replica_subscription_t *sub,
* already been scheduled to that destination - if so, we piggyback
* another datagram onto it to minimize communication costs.
*/
reqs = (orte_gpr_replica_requestor_t**)(sub->requestors)->addr;
for (i=0, j=0; j < sub->num_requestors &&
i < (sub->requestors)->size; i++) {
if (NULL != reqs[i]) {
j++;
/* see if a callback has already been registered for this process */
for (cb = (orte_gpr_replica_callbacks_t*)opal_list_get_first(&(orte_gpr_replica.callbacks));
cb != (orte_gpr_replica_callbacks_t*)opal_list_get_end(&(orte_gpr_replica.callbacks));
cb = (orte_gpr_replica_callbacks_t*)opal_list_get_next(cb)) {
if ((NULL == reqs[i]->requestor && NULL == cb->requestor) ||
((NULL != reqs[i]->requestor && NULL != cb->requestor) &&
(0 == orte_ns.compare(ORTE_NS_CMP_ALL,
reqs[i]->requestor,
cb->requestor)))) {
/* okay, a callback has been registered to send data to this
* process - add to that message
*/
goto PROCESS;
}
}
/* this is going to somebody new - create a new callback
* for this requestor
*/
cb = OBJ_NEW(orte_gpr_replica_callbacks_t);
if (NULL == cb) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
if (NULL == reqs[i]->requestor) {
cb->requestor = NULL;
} else {
if (ORTE_SUCCESS != (rc = orte_ns.copy_process_name(&(cb->requestor), reqs[i]->requestor))) {
/* first, we need to determine if the data in this message
* is to be sent back through the trigger callback function
* or not. if it is, then we set the callback's message
* to point at the correct trigger id for that requestor
* so the message goes to the correct place, and we go ahead
* and store the data in the message
*/
if (NULL != trig && NULL != trig->master) {
/* define the callback */
if (ORTE_SUCCESS != (rc = orte_gpr_replica_define_callback(&cb, (trig->master)->requestor))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
/* transfer the trigger name, if available */
if (NULL != trig->name) {
(cb->message)->name = strdup(trig->name);
}
/* set the callback id to point to the trigger callback function */
(cb->message)->id = (trig->master)->idtag;
/* cycle through all the subscription's requestors and place
* the data on the message so that the trigger master can distribute
* it as required
*/
reqs = (orte_gpr_replica_requestor_t**)(sub->requestors)->addr;
for (i=0, j=0; j < sub->num_requestors &&
i < (sub->requestors)->size; i++) {
if (NULL != reqs[i]) {
j++;
if (ORTE_SUCCESS != (rc = orte_gpr_replica_store_value_in_msg(reqs[i]->idtag,
cb->message, cnt, values))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
}
opal_list_append(&orte_gpr_replica.callbacks, &cb->item);
/* construct the message */
cb->message = OBJ_NEW(orte_gpr_notify_message_t);
if (NULL == cb->message) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
} else {
/* this data is intended to be sent to the individual
* subscribers themselves. Cycle through the subscription's
* requestors, define callbacks to them appropriately,
* and set the id to indicate that it does NOT go
* to a trigger
*/
reqs = (orte_gpr_replica_requestor_t**)(sub->requestors)->addr;
for (i=0, j=0; j < sub->num_requestors &&
i < (sub->requestors)->size; i++) {
if (NULL != reqs[i]) {
j++;
/* define the callback */
if (ORTE_SUCCESS != (rc = orte_gpr_replica_define_callback(&cb, reqs[i]->requestor))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
/* set the callback id to indicate not a trigger callback */
(cb->message)->id = ORTE_GPR_TRIGGER_ID_MAX;
/* okay, now we have a message going to the requestor. We need to
* store the values in the notify_data structure corresponding to this
* subscription id, combining data where the id's match
*/
if (ORTE_SUCCESS != (rc = orte_gpr_replica_store_value_in_msg(reqs[i]->idtag,
cb->message, cnt, values))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
}
PROCESS:
/* okay, now we have a message going to the requestor. We need to
* store the values in the notify_data structure corresponding to this
* subscription id, combining data where the id's match
*/
if (ORTE_SUCCESS != (rc = orte_gpr_replica_store_value_in_msg(reqs[i]->idtag,
cb->message, cnt, values))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
} /* if NULL */
} /* for i */
} /* for i */
} /* if else */
CLEANUP:
if (cleanup_reqd) {
for (i=0; i < cnt; i++) OBJ_RELEASE(values[i]);
if (NULL != values) free(values);
}
/* release the values here - the value objects have been "retained" in
* the store_value function, so this just ensures that they will be
* released after the last datagram lets go of them
*/
for (i=0; i < cnt; i++) OBJ_RELEASE(values[i]);
/* release the values array IF and only IF it was malloc'd here.
* otherwise, the value is coming in from the outside - when that happens,
* only a single value is passed in, so there is no array to free
*/
if (cleanup_reqd) free(values);
return rc;
}
int orte_gpr_replica_define_callback(orte_gpr_replica_callbacks_t **cbptr,
orte_process_name_t *recipient)
{
orte_gpr_replica_callbacks_t *cb;
int rc;
/* see if a callback has already been registered for this recipient */
for (cb = (orte_gpr_replica_callbacks_t*)opal_list_get_first(&(orte_gpr_replica.callbacks));
cb != (orte_gpr_replica_callbacks_t*)opal_list_get_end(&(orte_gpr_replica.callbacks));
cb = (orte_gpr_replica_callbacks_t*)opal_list_get_next(cb)) {
if ((NULL == recipient && NULL == cb->requestor) ||
((NULL != recipient && NULL != cb->requestor) &&
(0 == orte_ns.compare(ORTE_NS_CMP_ALL,
recipient,
cb->requestor)))) {
/* okay, a callback has been registered to send data to this
* recipient - return this location
*/
*cbptr = cb;
return ORTE_SUCCESS;
}
}
/* this is going to somebody new - create a new callback
* for this recipient
*/
cb = OBJ_NEW(orte_gpr_replica_callbacks_t);
if (NULL == cb) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
opal_list_append(&orte_gpr_replica.callbacks, &cb->item);
/* construct the message */
cb->message = OBJ_NEW(orte_gpr_notify_message_t);
if (NULL == cb->message) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (NULL == recipient) {
cb->requestor = NULL;
} else {
if (ORTE_SUCCESS != (rc = orte_ns.copy_process_name(&(cb->requestor), recipient))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
/* return the pointer to the new callback */
*cbptr = cb;
return ORTE_SUCCESS;
}
int orte_gpr_replica_store_value_in_msg(orte_gpr_subscription_id_t id,
orte_gpr_notify_message_t *msg,
size_t cnt,
orte_gpr_value_t **values)
{
size_t j, k, n, index;
int rc;
orte_gpr_value_t **vals;
orte_gpr_keyval_t **kptr;
size_t i, j, k, index;
orte_gpr_notify_data_t **data, *dptr;
/* find the datagram corresponding to the provided subscription id */
if (NULL == msg->data) { /* first datagram on message */
msg->data = (orte_gpr_notify_data_t**)malloc(sizeof(orte_gpr_notify_data_t*));
if (NULL == msg->data) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
index = 0; /* need to assign location */
msg->cnt = 1;
} else {
/* check to see if this data is going to the same callback as
* any prior data on the message. if so, then we add the values
* to that existing data structure. if not, then we realloc to
* establish a new data structure and store the data there
*/
for (k=0; k < msg->cnt; k++) {
if (msg->data[k]->id == id) { /* going to the same place */
if (ORTE_SUCCESS != (rc =
orte_gpr_replica_add_value_to_datagram(
msg->data[k], cnt, values))) {
ORTE_ERROR_LOG(rc);
/* check to see if this data is going to the same place as
* any prior data on the message. if so, then we add the values
* to that existing data structure. if not, then we realloc to
* establish a new data structure and store the data there
*/
data = (orte_gpr_notify_data_t**)(msg->data)->addr;
for (i=0, k=0; k < msg->cnt &&
i < (msg->data)->size; i++) {
if (NULL != data[i]) {
k++;
if (data[i]->id == id) { /* going to the same place */
for (j=0; j < cnt; j++) {
if (0 > orte_pointer_array_add(&index, data[i]->values, values[j])) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
}
return rc;
/* must "retain" the value object to ensure that it is
* there for this datagram. Since we are only storing
* pointers to the object (and not actually copying it),
* datagrams may wind up sharing the object. Hence, when
* a datagram is released, it will release the object. Without
* the retain, the next datagram that shares that object
* will see trash
*/
OBJ_RETAIN(values[j]);
data[i]->cnt += cnt;
return ORTE_SUCCESS;
}
}
/* no prior matching data found, so add another data location to the message */
msg->data = (orte_gpr_notify_data_t **) realloc(msg->data, (msg->cnt + 1)*sizeof(orte_gpr_notify_data_t*));
if (NULL == msg->data) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
index = msg->cnt;
(msg->cnt)++;
}
msg->data[index] = OBJ_NEW(orte_gpr_notify_data_t);
if (NULL == msg->data[index]) {
/* no prior matching data found, so add another data location to
* the message and store the values there
*/
dptr = OBJ_NEW(orte_gpr_notify_data_t);
if (NULL == dptr) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* store the callback id */
msg->data[index]->id = id;
/* since this datagram is new, allocate the required data locations */
msg->data[index]->cnt = cnt;
if (0 == cnt) { /* no data to attach */
return ORTE_SUCCESS;
}
msg->data[index]->values = (orte_gpr_value_t**)malloc(cnt * sizeof(orte_gpr_value_t*));
if (NULL == msg->data[index]->values) {
dptr->id = id;
if (0 > orte_pointer_array_add(&index, msg->data, dptr)) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* transfer the values to the datagram */
vals = msg->data[index]->values;
(msg->cnt)++;
for (j=0; j < cnt; j++) {
vals[j] = OBJ_NEW(orte_gpr_value_t);
if (NULL == vals[j]) {
if (0 > orte_pointer_array_add(&index, dptr->values, values[j])) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* record the addressing mode */
vals[j]->addr_mode = values[j]->addr_mode;
/* record the segment these values came from */
vals[j]->segment = strdup(values[j]->segment);
if (NULL == (vals[j]->segment)) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* record the tokens describing the container */
vals[j]->num_tokens = values[j]->num_tokens;
if (0 == values[j]->num_tokens) {
/* this is an illegal case - the tokens here describe
* the container from which this data was obtained. The
* container MUST have tokens that describe it
*/
ORTE_ERROR_LOG(ORTE_ERR_GPR_DATA_CORRUPT);
return ORTE_ERR_GPR_DATA_CORRUPT;
}
vals[j]->tokens = (char **)malloc(values[j]->num_tokens *
sizeof(char*));
if (NULL == vals[j]->tokens) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
for (n=0; n < values[j]->num_tokens; n++) {
vals[j]->tokens[n] = strdup(values[j]->tokens[n]);
if (NULL == vals[j]->tokens[n]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
}
/* record the keyvals */
vals[j]->keyvals = (orte_gpr_keyval_t**)malloc(values[j]->cnt *
sizeof(orte_gpr_keyval_t*));
if (NULL == vals[j]->keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
vals[j]->cnt = values[j]->cnt;
kptr = vals[j]->keyvals;
for (n=0; n < values[j]->cnt; n++) {
kptr[n] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == kptr[n]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
kptr[n]->key = strdup((values[j]->keyvals[n])->key);
if (NULL == kptr[n]->key) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
kptr[n]->type = (values[j]->keyvals[n])->type;
if (ORTE_SUCCESS != (rc = orte_gpr_base_xfer_payload(
&(kptr[n]->value), &((values[j]->keyvals[n])->value),
(values[j]->keyvals[n])->type))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}
return ORTE_SUCCESS;
}
static int orte_gpr_replica_add_value_to_datagram(orte_gpr_notify_data_t *data,
size_t cnt, orte_gpr_value_t **values)
{
size_t i, j, k, n, m, index, matches, num_tokens;
int rc;
orte_gpr_value_t *value;
orte_gpr_keyval_t **kptr;
for (i=0; i < cnt; i++) {
value = values[i];
/* check to see if this value is from the same container
* as some prior one. if so, then we add those itagvals
* to the existing value structure. if not, then we realloc to
* establish a new value structure and store the data there
/* must "retain" the value object to ensure that it is
* there for this datagram. Since we are only storing
* pointers to the object (and not actually copying it),
* datagrams may wind up sharing the object. Hence, when
* a datagram is released, it will release the object. Without
* the retain, the next datagram that shares that object
* will see trash
*/
for (k=0; k < data->cnt; k++) {
matches = 0;
num_tokens = data->values[k]->num_tokens;
if (num_tokens == value->num_tokens) { /* must have same number or can't match */
for (j=0; j < num_tokens; j++) {
for (m=0; m < num_tokens; m++) {
if (0 == strcmp((data->values[k])->tokens[j], value->tokens[m])) {
matches++;
}
}
if (num_tokens == matches) { /* from same container - just add keyvals to it */
index = k;
goto ADDKVALS;
}
}
}
}
/* no prior matching data found, so add another value structure to the object */
data->values = (orte_gpr_value_t**)realloc(data->values, (data->cnt + 1)*sizeof(orte_gpr_value_t*));
if (NULL == data->values) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
index = data->cnt;
(data->cnt)++;
data->values[index] = OBJ_NEW(orte_gpr_value_t);
if (NULL == data->values[index]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* record the addressing mode */
data->values[index]->addr_mode = value->addr_mode;
/* record the segment these values came from */
data->values[index]->segment = strdup(value->segment);
if (NULL == data->values[index]->segment) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* record the tokens describing the container */
data->values[index]->num_tokens = value->num_tokens;
if (0 == value->num_tokens) {
/* this is an illegal case - the tokens here describe
* the container from which this data was obtained. The
* container MUST have tokens that describe it
*/
ORTE_ERROR_LOG(ORTE_ERR_GPR_DATA_CORRUPT);
return ORTE_ERR_GPR_DATA_CORRUPT;
}
data->values[index]->tokens = (char **)malloc(value->num_tokens * sizeof(char*));
if (NULL == data->values[index]->tokens) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
for (n=0; n < value->num_tokens; n++) {
data->values[index]->tokens[n] = strdup(value->tokens[n]);
if (NULL == data->values[index]->tokens[n]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
}
ADDKVALS:
/* transfer the data in the value to be returned */
if (0 < data->values[index]->cnt) { /* already have some data here, so add to the space */
n = data->values[index]->cnt + value->cnt;
data->values[index]->keyvals = (orte_gpr_keyval_t**)
realloc(data->values[index]->keyvals, n * sizeof(orte_gpr_keyval_t*));
if (NULL == data->values[index]->keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
kptr = &(data->values[index]->keyvals[data->values[index]->cnt]);
data->values[index]->cnt = n;
} else {
data->values[index]->keyvals = (orte_gpr_keyval_t**)malloc(value->cnt * sizeof(orte_gpr_keyval_t*));
if (NULL == data->values[index]->keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
data->values[index]->cnt = value->cnt;
kptr = data->values[index]->keyvals;
}
for (n=0; n < value->cnt; n++) {
kptr[n] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == kptr[n]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
kptr[n]->key = strdup((value->keyvals[n])->key);
if (NULL == kptr[n]->key) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
kptr[n]->type = (value->keyvals[n])->type;
if (ORTE_SUCCESS != (rc = orte_gpr_base_xfer_payload(
&(kptr[n]->value), &((value->keyvals[n])->value),
(value->keyvals[n])->type))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
OBJ_RETAIN(values[j]);
}
dptr->cnt = cnt;
return ORTE_SUCCESS;
}

Просмотреть файл

@ -39,9 +39,10 @@ int orte_gpr_replica_subscribe_fn(orte_process_name_t *requestor,
size_t num_trigs,
orte_gpr_trigger_t **trigs)
{
orte_gpr_replica_subscription_t *sub=NULL, **subs;
orte_gpr_replica_subscription_t *sub=NULL, **subs, **trigsubs;
orte_gpr_replica_trigger_t *trig=NULL;
size_t i, j, k, index;
size_t i, j, k, m, n, index;
bool ignore;
int rc=ORTE_SUCCESS;
if (orte_gpr_replica_globals.debug) {
@ -78,19 +79,37 @@ int orte_gpr_replica_subscribe_fn(orte_process_name_t *requestor,
ORTE_ERROR_LOG(rc);
return rc;
}
/* link the subscriptions to the new trigger
/* link the subscriptions to the new trigger. only do this if the
* subscription doesn't already exist on this trigger - otherwise,
* we'd just be duplicating things.
*/
trigsubs = (orte_gpr_replica_subscription_t**)(trig->subscriptions)->addr;
for (j=0, k=0; k < num_subs &&
j < (orte_gpr_replica_globals.sub_ptrs)->size; j++) {
if (NULL != subs[j]) {
k++;
if (0 > orte_pointer_array_add(&index, trig->subscriptions, subs[j])) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
/* check to see if this subscription is already attached
* to this trigger - if not, add it
*/
ignore = false;
for (m=0, n=0; n < trig->num_subscriptions &&
m < (trig->subscriptions)->size; m++) {
if (NULL != trigsubs[m]) {
n++;
if (subs[j] == trigsubs[m]) { /* already present */
ignore = true;
}
}
}
if (!ignore) { /* new sub for this trig - add it */
if (0 > orte_pointer_array_add(&index, trig->subscriptions, subs[j])) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
(trig->num_subscriptions)++;
}
}
}
trig->num_subscriptions += num_subs;
}
return rc;

Просмотреть файл

@ -33,54 +33,6 @@
#include "gpr_replica_fn.h"
/* FUNCTIONS REQUIRED FOR LOCAL SUBSCRIPTION AND TRIGGER
* REGISTRATION
*/
int
orte_gpr_replica_enter_local_subscription(size_t cnt, orte_gpr_subscription_t **subscriptions)
{
orte_gpr_replica_local_subscriber_t *sub;
size_t i, id;
for (i=0; i < cnt; i++) {
sub = OBJ_NEW(orte_gpr_replica_local_subscriber_t);
if (NULL == sub) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub->callback = subscriptions[i]->cbfunc;
sub->user_tag = subscriptions[i]->user_tag;
if (0 > orte_pointer_array_add(&id, orte_gpr_replica_globals.local_subscriptions, sub)) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
sub->id = (orte_gpr_subscription_id_t)id;
subscriptions[i]->id = sub->id;
(orte_gpr_replica_globals.num_local_subs)++;
}
return ORTE_SUCCESS;
}
int
orte_gpr_replica_enter_local_trigger(size_t cnt, orte_gpr_trigger_t **trigs)
{
size_t i;
for (i=0; i < cnt; i++) {
if (ORTE_GPR_TRIGGER_ID_MAX-1 > orte_gpr_replica_globals.trig_cntr) {
trigs[i]->id = orte_gpr_replica_globals.trig_cntr;
(orte_gpr_replica_globals.trig_cntr)++;
} else {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
}
return ORTE_SUCCESS;
}
/*
* GENERAL REGISTRY TRIGGER FUNCTIONS
*/
@ -511,6 +463,22 @@ ADDREQ:
*/
req->idtag = trigger->id;
/* see if the ROUTE_DATA_TO_ME flag is set. This indicates
* that the requestor wants all data sent to them and
* is assuming all responsibility for properly routing
* the data
*/
if (ORTE_GPR_TRIG_ROUTE_DATA_THRU_ME & trig->action) {
if (NULL != trig->master) {
/* someone already requested this responsibility.
* this is an error - report it
*/
ORTE_ERROR_LOG(ORTE_ERR_NOT_AVAILABLE);
} else {
trig->master = req;
}
}
/* report the location of this trigger */
*trigptr = trig;
@ -912,9 +880,6 @@ int orte_gpr_replica_check_trig(orte_gpr_replica_trigger_t *trig)
}
}
if (fire) { /* all levels were equal */
if (orte_gpr_replica_globals.debug) {
opal_output(0, "REGISTERING CALLBACK FOR TRIG %d", trig->index);
}
goto FIRED;
}
return ORTE_SUCCESS;
@ -955,7 +920,7 @@ FIRED:
i < (trig->subscriptions)->size; i++) {
if (NULL != subs[i]) {
j++;
if (ORTE_SUCCESS != (rc = orte_gpr_replica_register_callback(subs[i], NULL))) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_register_callback(trig, subs[i], NULL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -992,24 +957,28 @@ int orte_gpr_replica_check_subscription(orte_gpr_replica_subscription_t *sub)
{
orte_gpr_replica_action_taken_t **ptr;
size_t i, j, k;
orte_gpr_value_t value;
orte_gpr_value_t *value;
int rc=ORTE_SUCCESS;
/* Construct the base structure for returned data so it can be
* sent to the user, if required
*/
OBJ_CONSTRUCT(&value, orte_gpr_value_t);
value.cnt = 1;
value.keyvals = (orte_gpr_keyval_t**)malloc(sizeof(orte_gpr_keyval_t*));
if (NULL == value.keyvals) {
value = OBJ_NEW(orte_gpr_value_t);
if (NULL == value) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&value);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value.keyvals[0] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == value.keyvals[0]) {
value->cnt = 1;
value->keyvals = (orte_gpr_keyval_t**)malloc(sizeof(orte_gpr_keyval_t*));
if (NULL == value->keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_DESTRUCT(&value);
OBJ_RELEASE(value);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value->keyvals[0] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == value->keyvals[0]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
OBJ_RELEASE(value);
return ORTE_ERR_OUT_OF_RESOURCE;
}
@ -1042,7 +1011,7 @@ int orte_gpr_replica_check_subscription(orte_gpr_replica_subscription_t *sub)
((sub->action & ORTE_GPR_NOTIFY_VALUE_CHG) &&
(ptr[i]->action & ORTE_GPR_REPLICA_ENTRY_CHANGED)))
&& orte_gpr_replica_check_notify_matches(&value, sub, ptr[i])) {
&& orte_gpr_replica_check_notify_matches(value, sub, ptr[i])) {
/* if the notify matched one of the subscription values,
* then the address mode will have
@ -1050,16 +1019,16 @@ int orte_gpr_replica_check_subscription(orte_gpr_replica_subscription_t *sub)
* the segment name and tokens from the container that is
* being addressed!
*/
value.segment = strdup(ptr[i]->seg->name);
value.num_tokens = ptr[i]->cptr->num_itags;
value.tokens = (char **)malloc(value.num_tokens * sizeof(char*));
if (NULL == value.tokens) {
value->segment = strdup(ptr[i]->seg->name);
value->num_tokens = ptr[i]->cptr->num_itags;
value->tokens = (char **)malloc(value->num_tokens * sizeof(char*));
if (NULL == value->tokens) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto CLEANUP;
}
for (j=0; j < value.num_tokens; j++) {
for (j=0; j < value->num_tokens; j++) {
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dict_reverse_lookup(
&(value.tokens[j]),
&(value->tokens[j]),
ptr[i]->seg,
ptr[i]->cptr->itags[j]))) {
ORTE_ERROR_LOG(rc);
@ -1068,20 +1037,20 @@ int orte_gpr_replica_check_subscription(orte_gpr_replica_subscription_t *sub)
}
/* send back the recorded data */
if (ORTE_SUCCESS != (rc = orte_gpr_replica_dict_reverse_lookup(
&((value.keyvals[0])->key), ptr[i]->seg,
&((value->keyvals[0])->key), ptr[i]->seg,
ptr[i]->iptr->itag))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
(value.keyvals[0])->type = ptr[i]->iptr->type;
(value->keyvals[0])->type = ptr[i]->iptr->type;
if (ORTE_SUCCESS != (rc = orte_gpr_base_xfer_payload(
&((value.keyvals[0])->value), &(ptr[i]->iptr->value),
&((value->keyvals[0])->value), &(ptr[i]->iptr->value),
ptr[i]->iptr->type))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
if (ORTE_SUCCESS != (rc =
orte_gpr_replica_register_callback(sub, &value))) {
orte_gpr_replica_register_callback(NULL, sub, value))) {
ORTE_ERROR_LOG(rc);
goto CLEANUP;
}
@ -1090,7 +1059,6 @@ int orte_gpr_replica_check_subscription(orte_gpr_replica_subscription_t *sub)
}
CLEANUP:
OBJ_DESTRUCT(&value);
return rc;
}

Просмотреть файл

@ -45,13 +45,6 @@ extern "C" {
* typedefs needed in replica component
*/
/* JMS: This is only INT_MAX until bug 1345 is fixed, because this
value is used to set an MAC parameter, which can [currently] only
take an int. */
#define ORTE_GPR_REPLICA_MAX_SIZE INT_MAX
#define ORTE_GPR_REPLICA_BLOCK_SIZE 100
typedef size_t orte_gpr_replica_itag_t;
#define ORTE_GPR_REPLICA_ITAG_MAX SIZE_MAX
@ -91,15 +84,28 @@ typedef struct {
OBJ_CLASS_DECLARATION(orte_gpr_replica_local_subscriber_t);
/*
* Local trigger tracker for use by processes
* that are operating on the same node as the replica
*/
typedef struct {
opal_object_t super; /**< Allows this to be an object */
orte_gpr_trigger_id_t id; /**< id of this trigger */
orte_gpr_trigger_cb_fn_t callback; /**< Function to be called for notification */
void *user_tag; /**< User-provided tag for callback function */
} orte_gpr_replica_local_trigger_t;
OBJ_CLASS_DECLARATION(orte_gpr_replica_local_trigger_t);
typedef struct {
int debug;
int isolate;
size_t block_size;
size_t max_size;
opal_mutex_t mutex;
size_t num_local_subs;
orte_pointer_array_t *local_subscriptions;
size_t trig_cntr;
size_t num_local_trigs;
orte_pointer_array_t *local_triggers;
size_t num_srch_cptr;
orte_pointer_array_t *srch_cptr;
size_t num_overwritten;
@ -315,6 +321,11 @@ struct orte_gpr_replica_trigger_t {
/* array of requestors that have "attached" themselves to this trigger */
size_t num_attached;
orte_pointer_array_t *attached;
/* the "master" requestor - if someone asks to have all
* output routed through them, we record their info here
* so we can comply
*/
orte_gpr_replica_trigger_requestor_t *master;
/* the action that causes the trigger to be fired */
orte_gpr_notify_action_t action;
/* flag that indicates this trigger is a one-shot, has fired and

Просмотреть файл

@ -43,6 +43,16 @@ OBJ_CLASS_INSTANCE(
NULL); /* destructor */
/* LOCAL_TRIGGER */
/* no constructor or destructor needed, so just
* define instance */
OBJ_CLASS_INSTANCE(
orte_gpr_replica_local_trigger_t, /* type name */
opal_object_t, /* parent "class" name */
NULL, /* constructor */
NULL); /* destructor */
/* SEGMENT */
/* constructor - used to initialize state of segment instance */
static void orte_gpr_replica_segment_construct(orte_gpr_replica_segment_t* seg)
@ -51,14 +61,14 @@ static void orte_gpr_replica_segment_construct(orte_gpr_replica_segment_t* seg)
seg->itag = ORTE_GPR_REPLICA_ITAG_MAX;
seg->num_dict_entries = 0;
orte_pointer_array_init(&(seg->dict), orte_gpr_replica_globals.block_size,
orte_gpr_replica_globals.max_size,
orte_gpr_replica_globals.block_size);
orte_pointer_array_init(&(seg->dict), orte_gpr_array_block_size,
orte_gpr_array_max_size,
orte_gpr_array_block_size);
seg->num_containers = 0;
orte_pointer_array_init(&(seg->containers), orte_gpr_replica_globals.block_size,
orte_gpr_replica_globals.max_size,
orte_gpr_replica_globals.block_size);
orte_pointer_array_init(&(seg->containers), orte_gpr_array_block_size,
orte_gpr_array_max_size,
orte_gpr_array_block_size);
}
@ -117,9 +127,9 @@ static void orte_gpr_replica_container_construct(orte_gpr_replica_container_t* r
reg->itags = NULL;
reg->num_itags = 0;
orte_pointer_array_init(&(reg->itagvals), orte_gpr_replica_globals.block_size,
orte_gpr_replica_globals.max_size,
orte_gpr_replica_globals.block_size);
orte_pointer_array_init(&(reg->itagvals), orte_gpr_array_block_size,
orte_gpr_array_max_size,
orte_gpr_array_block_size);
reg->num_itagvals = 0;
OBJ_CONSTRUCT(&(reg->itaglist), orte_value_array_t);
@ -282,14 +292,14 @@ static void orte_gpr_replica_subscription_construct(orte_gpr_replica_subscriptio
sub->action = ORTE_GPR_REPLICA_NO_ACTION;
sub->num_values = 0;
orte_pointer_array_init(&(sub->values), orte_gpr_replica_globals.block_size,
orte_gpr_replica_globals.max_size,
orte_gpr_replica_globals.block_size);
orte_pointer_array_init(&(sub->values), orte_gpr_array_block_size,
orte_gpr_array_max_size,
orte_gpr_array_block_size);
sub->num_requestors = 0;
orte_pointer_array_init(&(sub->requestors), orte_gpr_replica_globals.block_size,
orte_gpr_replica_globals.max_size,
orte_gpr_replica_globals.block_size);
orte_pointer_array_init(&(sub->requestors), orte_gpr_array_block_size,
orte_gpr_array_max_size,
orte_gpr_array_block_size);
}
/* destructor - used to free any resources held by instance */
@ -365,22 +375,24 @@ static void orte_gpr_replica_trigger_construct(orte_gpr_replica_trigger_t* trig)
trig->index = 0;
trig->num_attached = 0;
orte_pointer_array_init(&(trig->attached), orte_gpr_replica_globals.block_size,
orte_gpr_replica_globals.max_size,
orte_gpr_replica_globals.block_size);
orte_pointer_array_init(&(trig->attached), orte_gpr_array_block_size,
orte_gpr_array_max_size,
orte_gpr_array_block_size);
trig->master = NULL;;
trig->action = ORTE_GPR_REPLICA_NO_ACTION;
trig->one_shot_fired = false;
trig->num_counters = 0;
orte_pointer_array_init(&(trig->counters), orte_gpr_replica_globals.block_size,
orte_gpr_replica_globals.max_size,
orte_gpr_replica_globals.block_size);
orte_pointer_array_init(&(trig->counters), orte_gpr_array_block_size,
orte_gpr_array_max_size,
orte_gpr_array_block_size);
trig->num_subscriptions = 0;
orte_pointer_array_init(&(trig->subscriptions), orte_gpr_replica_globals.block_size,
orte_gpr_replica_globals.max_size,
orte_gpr_replica_globals.block_size);
orte_pointer_array_init(&(trig->subscriptions), orte_gpr_array_block_size,
orte_gpr_array_max_size,
orte_gpr_array_block_size);
}

Просмотреть файл

@ -133,16 +133,6 @@ int orte_gpr_replica_open(void)
orte_gpr_replica_globals.debug = false;
}
id = mca_base_param_register_int("gpr", "replica", "maxsize", NULL,
ORTE_GPR_REPLICA_MAX_SIZE);
mca_base_param_lookup_int(id, &tmp);
orte_gpr_replica_globals.max_size = (size_t)tmp;
id = mca_base_param_register_int("gpr", "replica", "blocksize", NULL,
ORTE_GPR_REPLICA_BLOCK_SIZE);
mca_base_param_lookup_int(id, &tmp);
orte_gpr_replica_globals.block_size = (size_t)tmp;
id = mca_base_param_register_int("gpr", "replica", "isolate", NULL, 0);
mca_base_param_lookup_int(id, &tmp);
if (tmp) {
@ -188,27 +178,27 @@ orte_gpr_base_module_t *orte_gpr_replica_init(bool *allow_multi_user_threads, bo
/* initialize the registry head */
if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_gpr_replica.segments),
orte_gpr_replica_globals.block_size,
orte_gpr_replica_globals.max_size,
orte_gpr_replica_globals.block_size))) {
orte_gpr_array_block_size,
orte_gpr_array_max_size,
orte_gpr_array_block_size))) {
ORTE_ERROR_LOG(rc);
return NULL;
}
orte_gpr_replica.num_segs = 0;
if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_gpr_replica.triggers),
orte_gpr_replica_globals.block_size,
orte_gpr_replica_globals.max_size,
orte_gpr_replica_globals.block_size))) {
orte_gpr_array_block_size,
orte_gpr_array_max_size,
orte_gpr_array_block_size))) {
ORTE_ERROR_LOG(rc);
return NULL;
}
orte_gpr_replica.num_trigs = 0;
if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_gpr_replica.subscriptions),
orte_gpr_replica_globals.block_size,
orte_gpr_replica_globals.max_size,
orte_gpr_replica_globals.block_size))) {
orte_gpr_array_block_size,
orte_gpr_array_max_size,
orte_gpr_array_block_size))) {
ORTE_ERROR_LOG(rc);
return NULL;
}
@ -221,45 +211,54 @@ orte_gpr_base_module_t *orte_gpr_replica_init(bool *allow_multi_user_threads, bo
/* initialize the local subscription and trigger trackers */
if (ORTE_SUCCESS != (rc = orte_pointer_array_init(
&(orte_gpr_replica_globals.local_subscriptions),
orte_gpr_replica_globals.block_size,
orte_gpr_replica_globals.max_size,
orte_gpr_replica_globals.block_size))) {
orte_gpr_array_block_size,
orte_gpr_array_max_size,
orte_gpr_array_block_size))) {
ORTE_ERROR_LOG(rc);
return NULL;
}
orte_gpr_replica_globals.num_local_subs = 0;
orte_gpr_replica_globals.trig_cntr = 0;
if (ORTE_SUCCESS != (rc = orte_pointer_array_init(
&(orte_gpr_replica_globals.local_triggers),
orte_gpr_array_block_size,
orte_gpr_array_max_size,
orte_gpr_array_block_size))) {
ORTE_ERROR_LOG(rc);
return NULL;
}
orte_gpr_replica_globals.num_local_trigs = 0;
/* initialize the search arrays for temporarily storing search results */
if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_gpr_replica_globals.sub_ptrs),
100, orte_gpr_replica_globals.max_size, 100))) {
100, orte_gpr_array_max_size, 100))) {
ORTE_ERROR_LOG(rc);
return NULL;
}
if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_gpr_replica_globals.srch_cptr),
100, orte_gpr_replica_globals.max_size, 100))) {
100, orte_gpr_array_max_size, 100))) {
ORTE_ERROR_LOG(rc);
return NULL;
}
orte_gpr_replica_globals.num_srch_cptr = 0;
if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_gpr_replica_globals.overwritten),
20, orte_gpr_replica_globals.max_size, 20))) {
20, orte_gpr_array_max_size, 20))) {
ORTE_ERROR_LOG(rc);
return NULL;
}
orte_gpr_replica_globals.num_overwritten = 0;
if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_gpr_replica_globals.srch_ival),
100, orte_gpr_replica_globals.max_size, 100))) {
100, orte_gpr_array_max_size, 100))) {
ORTE_ERROR_LOG(rc);
return NULL;
}
orte_gpr_replica_globals.num_srch_ival = 0;
if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_gpr_replica_globals.acted_upon),
100, orte_gpr_replica_globals.max_size, 100))) {
100, orte_gpr_array_max_size, 100))) {
ORTE_ERROR_LOG(rc);
return NULL;
}

Просмотреть файл

@ -534,7 +534,13 @@ void mca_oob_tcp_registry_callback(
orte_gpr_notify_data_t* data,
void* cbdata)
{
size_t i;
size_t i, j, k;
orte_gpr_value_t **values, *value;
orte_gpr_keyval_t *keyval;
orte_buffer_t buffer;
mca_oob_tcp_addr_t* addr, *existing;
mca_oob_tcp_peer_t* peer;
if(mca_oob_tcp_component.tcp_debug > 1) {
opal_output(0, "[%lu,%lu,%lu] mca_oob_tcp_registry_callback\n",
ORTE_NAME_ARGS(orte_process_info.my_name));
@ -542,60 +548,60 @@ void mca_oob_tcp_registry_callback(
/* process the callback */
OPAL_THREAD_LOCK(&mca_oob_tcp_component.tcp_lock);
for(i = 0; i < data->cnt; i++) {
orte_gpr_value_t* value = data->values[i];
orte_buffer_t buffer;
mca_oob_tcp_addr_t* addr, *existing;
mca_oob_tcp_peer_t* peer;
size_t j;
for(j = 0; j < value->cnt; j++) {
/* check to make sure this is the requested key */
orte_gpr_keyval_t* keyval = value->keyvals[j];
if(strcmp(keyval->key,"oob-tcp") != 0)
continue;
/* transfer ownership of registry object to buffer and unpack */
OBJ_CONSTRUCT(&buffer, orte_buffer_t);
if(orte_dps.load(&buffer,
keyval->value.byteobject.bytes,
keyval->value.byteobject.size) != ORTE_SUCCESS) {
/* TSW - throw ERROR */
continue;
values = (orte_gpr_value_t**)(data->values)->addr;
for(i = 0, k=0; k < data->cnt &&
i < (data->values)->size; i++) {
if (NULL != values[i]) {
k++;
value = values[i];
for(j = 0; j < value->cnt; j++) {
/* check to make sure this is the requested key */
keyval = value->keyvals[j];
if(strcmp(keyval->key,"oob-tcp") != 0)
continue;
/* transfer ownership of registry object to buffer and unpack */
OBJ_CONSTRUCT(&buffer, orte_buffer_t);
if(orte_dps.load(&buffer,
keyval->value.byteobject.bytes,
keyval->value.byteobject.size) != ORTE_SUCCESS) {
/* TSW - throw ERROR */
continue;
}
keyval->type = ORTE_NULL;
keyval->value.byteobject.bytes = NULL;
keyval->value.byteobject.size = 0;
addr = mca_oob_tcp_addr_unpack(&buffer);
OBJ_DESTRUCT(&buffer);
if(NULL == addr) {
opal_output(0, "[%lu,%lu,%lu] mca_oob_tcp_registry_callback: unable to unpack peer address\n",
ORTE_NAME_ARGS(orte_process_info.my_name));
continue;
}
if(mca_oob_tcp_component.tcp_debug > 1) {
opal_output(0, "[%lu,%lu,%lu] mca_oob_tcp_registry_callback: received peer [%lu,%lu,%lu]\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(&(addr->addr_name)));
}
/* check for existing cache entry */
existing = (mca_oob_tcp_addr_t *)opal_hash_table_get_proc(
&mca_oob_tcp_component.tcp_peer_names, &addr->addr_name);
if(NULL != existing) {
/* TSW - need to update existing entry */
OBJ_RELEASE(addr);
continue;
}
/* insert into cache and notify peer */
opal_hash_table_set_proc(&mca_oob_tcp_component.tcp_peer_names, &addr->addr_name, addr);
peer = (mca_oob_tcp_peer_t *)opal_hash_table_get_proc(
&mca_oob_tcp_component.tcp_peers, &addr->addr_name);
if(NULL != peer)
mca_oob_tcp_peer_resolved(peer, addr);
}
keyval->type = ORTE_NULL;
keyval->value.byteobject.bytes = NULL;
keyval->value.byteobject.size = 0;
addr = mca_oob_tcp_addr_unpack(&buffer);
OBJ_DESTRUCT(&buffer);
if(NULL == addr) {
opal_output(0, "[%lu,%lu,%lu] mca_oob_tcp_registry_callback: unable to unpack peer address\n",
ORTE_NAME_ARGS(orte_process_info.my_name));
continue;
}
if(mca_oob_tcp_component.tcp_debug > 1) {
opal_output(0, "[%lu,%lu,%lu] mca_oob_tcp_registry_callback: received peer [%lu,%lu,%lu]\n",
ORTE_NAME_ARGS(orte_process_info.my_name),
ORTE_NAME_ARGS(&(addr->addr_name)));
}
/* check for existing cache entry */
existing = (mca_oob_tcp_addr_t *)opal_hash_table_get_proc(
&mca_oob_tcp_component.tcp_peer_names, &addr->addr_name);
if(NULL != existing) {
/* TSW - need to update existing entry */
OBJ_RELEASE(addr);
continue;
}
/* insert into cache and notify peer */
opal_hash_table_set_proc(&mca_oob_tcp_component.tcp_peer_names, &addr->addr_name, addr);
peer = (mca_oob_tcp_peer_t *)opal_hash_table_get_proc(
&mca_oob_tcp_component.tcp_peers, &addr->addr_name);
if(NULL != peer)
mca_oob_tcp_peer_resolved(peer, addr);
}
}
OPAL_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock);
@ -760,6 +766,7 @@ int mca_oob_tcp_init(void)
orte_gpr_subscription_t sub, *subs;
int rc;
opal_list_item_t* item;
char *tmp, *tmp2, *tmp3;
/* random delay to stagger connections back to seed */
#if defined(WIN32)
@ -930,8 +937,8 @@ int mca_oob_tcp_init(void)
return rc;
}
value->cnt = 1;
value->keyvals = (orte_gpr_keyval_t**)malloc(sizeof(orte_gpr_keyval_t*));
value->cnt = 2;
value->keyvals = (orte_gpr_keyval_t**)malloc(value->cnt * sizeof(orte_gpr_keyval_t*));
if(NULL == value->keyvals) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
@ -941,6 +948,11 @@ int mca_oob_tcp_init(void)
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
value->keyvals[1] = OBJ_NEW(orte_gpr_keyval_t);
if (NULL == value->keyvals[1]) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (ORTE_SUCCESS != (rc = orte_schema.get_proc_tokens(&(value->tokens),
&(value->num_tokens), orte_process_info.my_name))) {
ORTE_ERROR_LOG(rc);
@ -959,6 +971,15 @@ int mca_oob_tcp_init(void)
return rc;
}
(value->keyvals[1])->type = ORTE_STRING;
(value->keyvals[1])->key = strdup(ORTE_PROC_RML_IP_ADDRESS_KEY);
tmp = mca_oob.oob_get_addr();
tmp2 = strrchr(tmp, '/');
tmp3 = strrchr(tmp, ':');
*tmp3 = '\0';
(value->keyvals[1])->value.strptr = strdup(tmp2);
free(tmp);
if(mca_oob_tcp_component.tcp_debug > 2) {
opal_output(0, "[%lu,%lu,%lu] mca_oob_tcp_init: calling orte_gpr.put(%s)\n",
ORTE_NAME_ARGS(orte_process_info.my_name),

Просмотреть файл

@ -395,7 +395,7 @@ void orte_rmgr_base_proc_stage_gate_mgr(orte_gpr_notify_data_t *data,
orte_gpr_value_t **values;
orte_gpr_keyval_t **kvals;
orte_process_name_t *recipients;
size_t i, j, n=0;
size_t i, j, m, n=0;
orte_vpid_t k=0;
int rc;
bool found_slots=false, found_start=false;
@ -405,7 +405,7 @@ void orte_rmgr_base_proc_stage_gate_mgr(orte_gpr_notify_data_t *data,
/* check to see if this came from one of the stage gates as opposed
* to either terminate or finalize - if the latter, we ignore it
*/
values = data->values;
values = (orte_gpr_value_t**)(data->values)->addr;
kvals = values[0]->keyvals;
for (i=0; i < values[0]->cnt; i++) {
if (0 == strcmp(kvals[i]->key, ORTE_PROC_NUM_FINALIZED) ||
@ -419,7 +419,6 @@ void orte_rmgr_base_proc_stage_gate_mgr(orte_gpr_notify_data_t *data,
* to us. we use that value to extract the jobid for the returned
* data
*/
values = data->values;
if (ORTE_SUCCESS != (rc =
orte_schema.extract_jobid_from_segment_name(&job,
values[0]->segment))) {
@ -430,23 +429,27 @@ void orte_rmgr_base_proc_stage_gate_mgr(orte_gpr_notify_data_t *data,
/* value returned will contain the counter, which contains the number of
* procs in this job
*/
values = data->values;
for (i=0; i < data->cnt && (!found_slots || !found_start); i++) {
kvals = values[i]->keyvals;
/* check to see if ORTE_JOB_GLOBALS is the token */
if (NULL != values[i]->tokens &&
0 == strcmp(ORTE_JOB_GLOBALS, values[i]->tokens[0])) {
/* find the ORTE_JOB_SLOTS_KEY and the ORTE_JOB_VPID_START_KEY keyval */
for (j=0; j < values[i]->cnt && (!found_slots || !found_start); j++) {
if (NULL != kvals[j] && !found_slots &&
0 == strcmp(ORTE_JOB_SLOTS_KEY, kvals[j]->key)) {
n = kvals[j]->value.size;
found_slots = true;
}
if (NULL != kvals[j] && !found_start &&
0 == strcmp(ORTE_JOB_VPID_START_KEY, kvals[j]->key)) {
k = kvals[j]->value.vpid;
found_start = true;
for (i=0, m=0; m < data->cnt &&
i < (data->values)->size &&
(!found_slots || !found_start); i++) {
if (NULL != values[i]) {
m++;
kvals = values[i]->keyvals;
/* check to see if ORTE_JOB_GLOBALS is the token */
if (NULL != values[i]->tokens &&
0 == strcmp(ORTE_JOB_GLOBALS, values[i]->tokens[0])) {
/* find the ORTE_JOB_SLOTS_KEY and the ORTE_JOB_VPID_START_KEY keyval */
for (j=0; j < values[i]->cnt && (!found_slots || !found_start); j++) {
if (NULL != kvals[j] && !found_slots &&
0 == strcmp(ORTE_JOB_SLOTS_KEY, kvals[j]->key)) {
n = kvals[j]->value.size;
found_slots = true;
}
if (NULL != kvals[j] && !found_start &&
0 == strcmp(ORTE_JOB_VPID_START_KEY, kvals[j]->key)) {
k = kvals[j]->value.vpid;
found_start = true;
}
}
}
}
@ -505,7 +508,7 @@ void orte_rmgr_base_proc_stage_gate_mgr_abort(orte_gpr_notify_data_t *data,
* to us. we use that value to extract the jobid for the returned
* data
*/
values = data->values;
values = (orte_gpr_value_t**)(data->values)->addr;
if (ORTE_SUCCESS != (rc =
orte_schema.extract_jobid_from_segment_name(&job,
values[0]->segment))) {

Просмотреть файл

@ -235,17 +235,17 @@ static int orte_rmgr_proxy_terminate_proc(const orte_process_name_t* proc_name)
static void orte_rmgr_proxy_callback(orte_gpr_notify_data_t *data, void *cbdata)
{
orte_rmgr_cb_fn_t cbfunc = (orte_rmgr_cb_fn_t)cbdata;
orte_gpr_value_t **values;
orte_gpr_value_t **values, *value;
orte_gpr_keyval_t** keyvals;
orte_jobid_t jobid;
size_t i, j;
size_t i, j, k;
int rc;
/* we made sure in the subscriptions that at least one
* value is always returned
* get the jobid from the segment name in the first value
*/
values = data->values;
values = (orte_gpr_value_t**)(data->values)->addr;
if (ORTE_SUCCESS != (rc =
orte_schema.extract_jobid_from_segment_name(&jobid,
values[0]->segment))) {
@ -253,35 +253,39 @@ static void orte_rmgr_proxy_callback(orte_gpr_notify_data_t *data, void *cbdata)
return;
}
/* determine the state change */
for(i=0; i<data->cnt; i++) {
orte_gpr_value_t* value = data->values[i];
keyvals = value->keyvals;
for(j=0; j<value->cnt; j++) {
orte_gpr_keyval_t* keyval = keyvals[j];
if(strcmp(keyval->key, ORTE_PROC_NUM_AT_STG1) == 0) {
(*cbfunc)(jobid,ORTE_PROC_STATE_AT_STG1);
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NUM_AT_STG2) == 0) {
(*cbfunc)(jobid,ORTE_PROC_STATE_AT_STG1);
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NUM_AT_STG3) == 0) {
(*cbfunc)(jobid,ORTE_PROC_STATE_AT_STG3);
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NUM_FINALIZED) == 0) {
(*cbfunc)(jobid,ORTE_PROC_STATE_FINALIZED);
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NUM_TERMINATED) == 0) {
(*cbfunc)(jobid,ORTE_PROC_STATE_TERMINATED);
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NUM_ABORTED) == 0) {
(*cbfunc)(jobid,ORTE_PROC_STATE_ABORTED);
continue;
for(i = 0, k=0; k < data->cnt &&
i < (data->values)->size; i++) {
if (NULL != values[i]) {
k++;
value = values[i];
/* determine the state change */
keyvals = value->keyvals;
for(j=0; j<value->cnt; j++) {
orte_gpr_keyval_t* keyval = keyvals[j];
if(strcmp(keyval->key, ORTE_PROC_NUM_AT_STG1) == 0) {
(*cbfunc)(jobid,ORTE_PROC_STATE_AT_STG1);
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NUM_AT_STG2) == 0) {
(*cbfunc)(jobid,ORTE_PROC_STATE_AT_STG1);
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NUM_AT_STG3) == 0) {
(*cbfunc)(jobid,ORTE_PROC_STATE_AT_STG3);
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NUM_FINALIZED) == 0) {
(*cbfunc)(jobid,ORTE_PROC_STATE_FINALIZED);
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NUM_TERMINATED) == 0) {
(*cbfunc)(jobid,ORTE_PROC_STATE_TERMINATED);
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NUM_ABORTED) == 0) {
(*cbfunc)(jobid,ORTE_PROC_STATE_ABORTED);
continue;
}
}
}
}

Просмотреть файл

@ -182,17 +182,17 @@ static void orte_rmgr_urm_wireup_stdin(orte_jobid_t jobid)
static void orte_rmgr_urm_callback(orte_gpr_notify_data_t *data, void *cbdata)
{
orte_rmgr_cb_fn_t cbfunc = (orte_rmgr_cb_fn_t)cbdata;
orte_gpr_value_t **values;
orte_gpr_value_t **values, *value;
orte_gpr_keyval_t** keyvals;
orte_jobid_t jobid;
size_t i, j;
size_t i, j, k;
int rc;
/* we made sure in the subscriptions that at least one
* value is always returned
* get the jobid from the segment name in the first value
*/
values = data->values;
values = (orte_gpr_value_t**)(data->values)->addr;
if (ORTE_SUCCESS != (rc =
orte_schema.extract_jobid_from_segment_name(&jobid,
values[0]->segment))) {
@ -200,38 +200,42 @@ static void orte_rmgr_urm_callback(orte_gpr_notify_data_t *data, void *cbdata)
return;
}
/* determine the state change */
for(i=0; i<data->cnt; i++) {
orte_gpr_value_t* value = data->values[i];
keyvals = value->keyvals;
for(j=0; j<value->cnt; j++) {
orte_gpr_keyval_t* keyval = keyvals[j];
if(strcmp(keyval->key, ORTE_PROC_NUM_AT_STG1) == 0) {
(*cbfunc)(jobid,ORTE_PROC_STATE_AT_STG1);
/* BWB - XXX - FIX ME: this needs to happen when all
are LAUNCHED, before STG1 */
orte_rmgr_urm_wireup_stdin(jobid);
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NUM_AT_STG2) == 0) {
(*cbfunc)(jobid,ORTE_PROC_STATE_AT_STG1);
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NUM_AT_STG3) == 0) {
(*cbfunc)(jobid,ORTE_PROC_STATE_AT_STG3);
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NUM_FINALIZED) == 0) {
(*cbfunc)(jobid,ORTE_PROC_STATE_FINALIZED);
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NUM_TERMINATED) == 0) {
(*cbfunc)(jobid,ORTE_PROC_STATE_TERMINATED);
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NUM_ABORTED) == 0) {
(*cbfunc)(jobid,ORTE_PROC_STATE_ABORTED);
continue;
for(i = 0, k=0; k < data->cnt &&
i < (data->values)->size; i++) {
if (NULL != values[i]) {
k++;
value = values[i];
/* determine the state change */
keyvals = value->keyvals;
for(j=0; j<value->cnt; j++) {
orte_gpr_keyval_t* keyval = keyvals[j];
if(strcmp(keyval->key, ORTE_PROC_NUM_AT_STG1) == 0) {
(*cbfunc)(jobid,ORTE_PROC_STATE_AT_STG1);
/* BWB - XXX - FIX ME: this needs to happen when all
are LAUNCHED, before STG1 */
orte_rmgr_urm_wireup_stdin(jobid);
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NUM_AT_STG2) == 0) {
(*cbfunc)(jobid,ORTE_PROC_STATE_AT_STG1);
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NUM_AT_STG3) == 0) {
(*cbfunc)(jobid,ORTE_PROC_STATE_AT_STG3);
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NUM_FINALIZED) == 0) {
(*cbfunc)(jobid,ORTE_PROC_STATE_FINALIZED);
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NUM_TERMINATED) == 0) {
(*cbfunc)(jobid,ORTE_PROC_STATE_TERMINATED);
continue;
}
if(strcmp(keyval->key, ORTE_PROC_NUM_ABORTED) == 0) {
(*cbfunc)(jobid,ORTE_PROC_STATE_ABORTED);
continue;
}
}
}
}

Просмотреть файл

@ -51,37 +51,38 @@ extern char *orte_error_strings[];
* ORTE-wide key names for storing/retrieving data from the registry.
* Subsystem-specific keys will be defined in each=/ subsystem's xxx_types.h file.
*/
#define ORTE_CELLID_KEY "orte-cellid"
#define ORTE_JOBID_KEY "orte-jobid"
#define ORTE_VPID_KEY "orte-vpid"
#define ORTE_NODE_NAME_KEY "orte-node-name"
#define ORTE_NODE_ARCH_KEY "orte-node-arch"
#define ORTE_NODE_STATE_KEY "orte-node-state"
#define ORTE_NODE_SLOTS_KEY "orte-node-slots"
#define ORTE_NODE_SLOTS_ALLOC_KEY "orte-node-slots-alloc"
#define ORTE_NODE_SLOTS_MAX_KEY "orte-node-slots-max"
#define ORTE_NODE_ALLOC_KEY "orte-node-alloc"
#define ORTE_NODE_BOOTPROXY_KEY "orte-node-bootproxy"
#define ORTE_JOB_APP_CONTEXT_KEY "orte-job-app-context"
#define ORTE_JOB_SLOTS_KEY "orte-job-slots" /**< number of procs in job */
#define ORTE_JOB_VPID_START_KEY "orte-job-vpid-start"
#define ORTE_JOB_VPID_RANGE_KEY "orte-job-vpid-range"
#define ORTE_JOB_IOF_KEY "orte-job-iof"
#define ORTE_PROC_NAME_KEY "orte-proc-name"
#define ORTE_PROC_RANK_KEY "orte-proc-rank"
#define ORTE_PROC_PID_KEY "orte-proc-pid"
#define ORTE_PROC_LOCAL_PID_KEY "orte-proc-local-pid"
#define ORTE_PROC_STATE_KEY "orte-proc-state"
#define ORTE_PROC_APP_CONTEXT_KEY "orte-proc-app-context"
#define ORTE_PROC_EXIT_CODE_KEY "orte-proc-exit-code"
#define ORTE_PROC_NUM_ALIVE "orte-proc-num-alive"
#define ORTE_PROC_NUM_ABORTED "orte-proc-num-aborted"
#define ORTE_PROC_NUM_AT_STG1 "orte-proc-num-stg1"
#define ORTE_PROC_NUM_AT_STG2 "orte-proc-num-stg2"
#define ORTE_PROC_NUM_AT_STG3 "orte-proc-num-stg3"
#define ORTE_PROC_NUM_FINALIZED "orte-proc-num-finalized"
#define ORTE_PROC_NUM_TERMINATED "orte-proc-num-terminated"
#define ORTE_CELLID_KEY "orte-cellid"
#define ORTE_JOBGRP_KEY "orte-jobgrp"
#define ORTE_JOBID_KEY "orte-jobid"
#define ORTE_VPID_KEY "orte-vpid"
#define ORTE_NODE_NAME_KEY "orte-node-name"
#define ORTE_NODE_ARCH_KEY "orte-node-arch"
#define ORTE_NODE_STATE_KEY "orte-node-state"
#define ORTE_NODE_SLOTS_KEY "orte-node-slots"
#define ORTE_NODE_SLOTS_ALLOC_KEY "orte-node-slots-alloc"
#define ORTE_NODE_SLOTS_MAX_KEY "orte-node-slots-max"
#define ORTE_NODE_ALLOC_KEY "orte-node-alloc"
#define ORTE_NODE_BOOTPROXY_KEY "orte-node-bootproxy"
#define ORTE_JOB_APP_CONTEXT_KEY "orte-job-app-context"
#define ORTE_JOB_SLOTS_KEY "orte-job-slots" /**< number of procs in job */
#define ORTE_JOB_VPID_START_KEY "orte-job-vpid-start"
#define ORTE_JOB_VPID_RANGE_KEY "orte-job-vpid-range"
#define ORTE_JOB_IOF_KEY "orte-job-iof"
#define ORTE_PROC_NAME_KEY "orte-proc-name"
#define ORTE_PROC_RANK_KEY "orte-proc-rank"
#define ORTE_PROC_PID_KEY "orte-proc-pid"
#define ORTE_PROC_LOCAL_PID_KEY "orte-proc-local-pid"
#define ORTE_PROC_STATE_KEY "orte-proc-state"
#define ORTE_PROC_APP_CONTEXT_KEY "orte-proc-app-context"
#define ORTE_PROC_EXIT_CODE_KEY "orte-proc-exit-code"
#define ORTE_PROC_NUM_ALIVE "orte-proc-num-alive"
#define ORTE_PROC_NUM_ABORTED "orte-proc-num-aborted"
#define ORTE_PROC_NUM_AT_STG1 "orte-proc-num-stg1"
#define ORTE_PROC_NUM_AT_STG2 "orte-proc-num-stg2"
#define ORTE_PROC_NUM_AT_STG3 "orte-proc-num-stg3"
#define ORTE_PROC_NUM_FINALIZED "orte-proc-num-finalized"
#define ORTE_PROC_NUM_TERMINATED "orte-proc-num-terminated"
#define ORTE_PROC_RML_IP_ADDRESS_KEY "orte-proc-rml-ip-addr"
/*
* ORTE-wide names for specific system triggers and subscriptions
*/