diff --git a/src/attribute/attribute_predefined.c b/src/attribute/attribute_predefined.c index e964dec4da..0812553080 100644 --- a/src/attribute/attribute_predefined.c +++ b/src/attribute/attribute_predefined.c @@ -77,7 +77,7 @@ int ompi_attr_create_predefined(void) where the master is supposed to SPAWN the other processes. Perhaps need some integration with the LLM here...? [shrug] */ - universe = ompi_registry.get(OMPI_REGISTRY_OR, "ompi-vm", NULL); + universe = ompi_registry.get(OMPI_REGISTRY_OR, OMPI_RTE_VM_STATUS_SEGMENT, NULL); attr_universe_size = 0; if (0 == ompi_list_get_size(universe)) { attr_universe_size = ompi_comm_size(MPI_COMM_WORLD); diff --git a/src/communicator/comm_dyn.c b/src/communicator/comm_dyn.c index d6ce87165e..ee4e209381 100644 --- a/src/communicator/comm_dyn.c +++ b/src/communicator/comm_dyn.c @@ -24,6 +24,7 @@ #include "mca/pcm/base/base.h" #include "mca/pml/pml.h" #include "mca/ns/base/base.h" +#include "mca/gpr/base/base.h" #include "mca/pml/pml.h" #include "mca/oob/base/base.h" @@ -270,6 +271,7 @@ int ompi_comm_start_processes (char *command, char **argv, int maxprocs, char *tmp, *envvarname, *segment, *my_contact_info; char cwd[MAXPATHLEN]; int rc; + ompi_registry_notify_id_t rc_tag; /* parse the info object */ /* check potentially for: @@ -368,16 +370,17 @@ int ompi_comm_start_processes (char *command, char **argv, int maxprocs, * register to monitor the startup and shutdown processes */ /* setup segment for this job */ - asprintf(&segment, "ompi-job-%d", new_jobid); + asprintf(&segment, "%s-%s", OMPI_RTE_JOB_STATUS_SEGMENT, + ompi_name_server.convert_jobid_to_string(new_jobid)); /* register a synchro on the segment so we get notified when everyone registers */ - rc = ompi_registry.synchro( - OMPI_REGISTRY_SYNCHRO_MODE_LEVEL|OMPI_REGISTRY_SYNCHRO_MODE_ONE_SHOT, - OMPI_REGISTRY_OR, - segment, - NULL, - maxprocs, - ompi_rte_all_procs_registered, NULL); + rc_tag = ompi_registry.synchro( + OMPI_REGISTRY_SYNCHRO_MODE_LEVEL|OMPI_REGISTRY_SYNCHRO_MODE_ONE_SHOT, + OMPI_REGISTRY_OR, + segment, + NULL, + maxprocs, + ompi_rte_all_procs_registered, NULL); /* @@ -393,7 +396,11 @@ int ompi_comm_start_processes (char *command, char **argv, int maxprocs, return MPI_ERR_SPAWN; } - + /* + * tell processes okay to start by sending startup msg + */ + ompi_rte_job_startup(new_jobid); + /* * Clean up */ diff --git a/src/include/constants.h b/src/include/constants.h index 30cbaa6875..34e92fc868 100644 --- a/src/include/constants.h +++ b/src/include/constants.h @@ -31,7 +31,14 @@ enum { OMPI_ERR_CONNECTION_REFUSED = -21, /* contact made with process, but it refuses any further communication */ OMPI_ERR_CONNECTION_FAILED = -22, /* message sent, but delivery failed */ OMPI_ERR_TIMEOUT = -23, - OMPI_ERR_INIT = -24 + OMPI_STARTUP_DETECTED = -24, + OMPI_SHUTDOWN_DETECTED = -25, + OMPI_PROC_STARTING = -26, + OMPI_PROC_STOPPED = -27, + OMPI_PROC_TERMINATING = -28, + OMPI_PROC_ALIVE = -29, + OMPI_PROC_RUNNING = -30, + OMPI_PROC_KILLED = -31 }; #endif /* OMPI_CONSTANTS_H */ diff --git a/src/mca/base/mca_base_module_exchange.c b/src/mca/base/mca_base_module_exchange.c index d7877402f3..0d7fe6c726 100644 --- a/src/mca/base/mca_base_module_exchange.c +++ b/src/mca/base/mca_base_module_exchange.c @@ -209,8 +209,8 @@ static void mca_base_modex_registry_callback( void* bptr; int32_t bsize; bool isnew = false; - - /* transfer ownership of registry object to buffer and unpack */ + + /* transfer ownership of registry object to buffer and unpack */ ompi_buffer_init_preallocated(&buffer, value->object, value->object_size); value->object = NULL; value->object_size = 0; @@ -220,7 +220,9 @@ static void mca_base_modex_registry_callback( * Lookup the process. */ ompi_unpack(buffer, &proc_name, 1, OMPI_NAME); + proc = ompi_proc_find_and_add(&proc_name, &isnew); + if(NULL == proc) continue; if(isnew) { @@ -248,6 +250,7 @@ static void mca_base_modex_registry_callback( */ ompi_unpack_string(buffer, &component_name_version); + if(sscanf(component_name_version, "%[^-]-%[^-]-%d-%d", component.mca_type_name, component.mca_component_name, @@ -305,8 +308,8 @@ static void mca_base_modex_registry_callback( static int mca_base_modex_subscribe(ompi_process_name_t* name) { - int rc; - char segment[32]; + ompi_registry_notify_id_t rctag; + char *segment; ompi_list_item_t* item; mca_base_modex_subscription_t* subscription; @@ -324,18 +327,22 @@ static int mca_base_modex_subscribe(ompi_process_name_t* name) OMPI_UNLOCK(&mca_base_modex_lock); /* otherwise - subscribe */ - sprintf(segment, "modex-%X", name->jobid); - rc = ompi_registry.subscribe( + asprintf(&segment, "%s-%s", OMPI_RTE_MODEX_SEGMENT, mca_ns_base_get_jobid_string(name)); + rctag = ompi_registry.subscribe( OMPI_REGISTRY_OR, OMPI_REGISTRY_NOTIFY_ADD_ENTRY|OMPI_REGISTRY_NOTIFY_DELETE_ENTRY| - OMPI_REGISTRY_NOTIFY_MODIFICATION|OMPI_REGISTRY_NOTIFY_PRE_EXISTING, + OMPI_REGISTRY_NOTIFY_MODIFICATION| + OMPI_REGISTRY_NOTIFY_ON_STARTUP|OMPI_REGISTRY_NOTIFY_INCLUDE_STARTUP_DATA| + OMPI_REGISTRY_NOTIFY_ON_SHUTDOWN, segment, NULL, mca_base_modex_registry_callback, NULL); - if(rc != OMPI_SUCCESS) { + if(rctag == OMPI_REGISTRY_NOTIFY_ID_MAX) { ompi_output(0, "mca_base_modex_exchange: " - "ompi_registry.subscribe failed with return code %d\n", rc); + "ompi_registry.subscribe failed with return code %d\n", (int)rctag); + free(segment); + return OMPI_ERROR; } /* add this jobid to our list of subscriptions */ @@ -344,7 +351,8 @@ static int mca_base_modex_subscribe(ompi_process_name_t* name) subscription->jobid = name->jobid; ompi_list_append(&mca_base_modex_subscriptions, &subscription->item); OMPI_UNLOCK(&mca_base_modex_lock); - return rc; + free(segment); + return OMPI_SUCCESS; } @@ -360,15 +368,15 @@ int mca_base_modex_send( const void *data, size_t size) { - char segment[32]; - char component_name_version[256]; + char *segment; + char *component_name_version; char *keys[3]; ompi_buffer_t buffer; void* bptr; int bsize; int rc; - sprintf(component_name_version, "%s-%s-%d-%d", + asprintf(&component_name_version, "%s-%s-%d-%d", source_component->mca_type_name, source_component->mca_component_name, source_component->mca_component_major_version, @@ -378,20 +386,22 @@ int mca_base_modex_send( keys[1] = component_name_version; keys[2] = NULL; - ompi_buffer_init(&buffer, size+256); + ompi_buffer_init(&buffer, 0); ompi_pack(buffer, ompi_rte_get_self(), 1, OMPI_NAME); ompi_pack_string(buffer, component_name_version); ompi_pack(buffer, &size, 1, OMPI_INT32); ompi_pack(buffer, (void*)data, size, OMPI_BYTE); ompi_buffer_get(buffer, &bptr, &bsize); - sprintf(segment, "modex-%X", mca_oob_name_self.jobid); + asprintf(&segment, "%s-%s", OMPI_RTE_MODEX_SEGMENT, mca_ns_base_get_jobid_string(&mca_oob_name_self)); rc = ompi_registry.put( OMPI_REGISTRY_OVERWRITE, segment, keys, (ompi_registry_object_t)bptr, (ompi_registry_object_size_t)bsize); + free(segment); + free(component_name_version); return rc; } diff --git a/src/mca/gpr/base/Makefile.am b/src/mca/gpr/base/Makefile.am index c161b59962..49ad63749e 100644 --- a/src/mca/gpr/base/Makefile.am +++ b/src/mca/gpr/base/Makefile.am @@ -19,6 +19,23 @@ libmca_gpr_base_la_SOURCES = \ $(headers) \ gpr_base_open.c \ gpr_base_close.c \ + gpr_base_pack_cleanup.c \ + gpr_base_pack_del_index.c \ + gpr_base_unpack_del_index.c \ + gpr_base_pack_dump.c \ + gpr_base_print_dump.c \ + gpr_base_pack_mode_ops.c \ + gpr_base_unpack_mode_ops.c \ + gpr_base_pack_put_get.c \ + gpr_base_unpack_put_get.c \ + gpr_base_pack_startup_shutdown_msg.c \ + gpr_base_unpack_startup_shutdown_msg.c \ + gpr_base_pack_subscribe.c \ + gpr_base_unpack_subscribe.c \ + gpr_base_pack_synchro.c \ + gpr_base_unpack_synchro.c \ + gpr_base_pack_test_internals.c \ + gpr_base_unpack_test_internals.c \ gpr_base_select.c # Conditionally install the header files diff --git a/src/mca/gpr/base/base.h b/src/mca/gpr/base/base.h index 1067c53930..1910b2a9fe 100644 --- a/src/mca/gpr/base/base.h +++ b/src/mca/gpr/base/base.h @@ -47,34 +47,141 @@ */ #include "ompi_config.h" +#include +#include +#include +#include +#include +#include #include +#include "include/constants.h" + +#include "threads/mutex.h" +#include "threads/condition.h" + +#include "runtime/runtime.h" + +#include "util/output.h" +#include "util/proc_info.h" +#include "util/sys_info.h" +#include "util/bufpack.h" + #include "class/ompi_list.h" + #include "mca/mca.h" +#include "mca/base/base.h" +#include "mca/base/mca_base_param.h" +#include "mca/oob/base/base.h" + #include "mca/gpr/gpr.h" /* -* Global functions for MCA overall collective open and close -*/ + * Global functions for MCA overall collective open and close + */ #if defined(c_plusplus) || defined(__cplusplus) extern "C" { #endif -OMPI_DECLSPEC int mca_gpr_base_open(void); -OMPI_DECLSPEC int mca_gpr_base_select(bool *allow_multi_user_threads, - bool *have_hidden_threads); -OMPI_DECLSPEC int mca_gpr_base_close(void); + OMPI_DECLSPEC int mca_gpr_base_open(void); + OMPI_DECLSPEC int mca_gpr_base_select(bool *allow_multi_user_threads, + bool *have_hidden_threads); + OMPI_DECLSPEC int mca_gpr_base_close(void); -/* - * globals that might be needed - */ -OMPI_DECLSPEC extern int mca_gpr_base_output; -OMPI_DECLSPEC extern mca_gpr_base_module_t ompi_registry; /* holds selected module's function pointers */ -OMPI_DECLSPEC extern bool mca_gpr_base_selected; -OMPI_DECLSPEC extern ompi_list_t mca_gpr_base_components_available; -OMPI_DECLSPEC extern mca_gpr_base_component_t mca_gpr_base_selected_component; + /* general usage functions */ + OMPI_DECLSPEC int mca_gpr_base_pack_delete_segment(ompi_buffer_t cmd, bool silent, char *segment); + OMPI_DECLSPEC int mca_gpr_base_unpack_delete_segment(ompi_buffer_t buffer); + + OMPI_DECLSPEC int mca_gpr_base_pack_delete_object(ompi_buffer_t buffer, bool silent, + ompi_registry_mode_t mode, + char *segment, char **tokens); + OMPI_DECLSPEC int mca_gpr_base_unpack_delete_object(ompi_buffer_t buffer); + + OMPI_DECLSPEC int mca_gpr_base_pack_index(ompi_buffer_t cmd, char *segment); + OMPI_DECLSPEC int mca_gpr_base_unpack_index(ompi_buffer_t cmd, ompi_list_t *return_list); + + OMPI_DECLSPEC int mca_gpr_base_pack_cleanup(ompi_buffer_t cmd, mca_ns_base_jobid_t jobid); + + OMPI_DECLSPEC int mca_gpr_base_pack_synchro(ompi_buffer_t cmd, + ompi_registry_synchro_mode_t synchro_mode, + ompi_registry_mode_t mode, + char *segment, char **tokens, int trigger); + OMPI_DECLSPEC int mca_gpr_base_unpack_synchro(ompi_buffer_t buffer, + ompi_registry_notify_id_t *remote_idtag); + + OMPI_DECLSPEC int mca_gpr_base_pack_cancel_synchro(ompi_buffer_t cmd, + bool silent, + ompi_registry_notify_id_t remote_idtag); + OMPI_DECLSPEC int mca_gpr_base_unpack_cancel_synchro(ompi_buffer_t buffer); + + OMPI_DECLSPEC int mca_gpr_base_pack_subscribe(ompi_buffer_t cmd, + ompi_registry_mode_t mode, + ompi_registry_notify_action_t action, + char *segment, char **tokens); + OMPI_DECLSPEC int mca_gpr_base_unpack_subscribe(ompi_buffer_t buffer, + ompi_registry_notify_id_t *remote_idtag); + + OMPI_DECLSPEC int mca_gpr_base_pack_unsubscribe(ompi_buffer_t cmd, bool silent, + ompi_registry_notify_id_t remote_idtag); + OMPI_DECLSPEC int mca_gpr_base_unpack_unsubscribe(ompi_buffer_t buffer); + + OMPI_DECLSPEC int mca_gpr_base_pack_put(ompi_buffer_t cmd, bool silent, + ompi_registry_mode_t mode, char *segment, + char **tokens, ompi_registry_object_t object, + ompi_registry_object_size_t size); + OMPI_DECLSPEC int mca_gpr_base_unpack_put(ompi_buffer_t buffer); + + OMPI_DECLSPEC int mca_gpr_base_pack_get(ompi_buffer_t cmd, + ompi_registry_mode_t mode, + char *segment, char **tokens); + OMPI_DECLSPEC int mca_gpr_base_unpack_get(ompi_buffer_t buffer, ompi_list_t *return_list); + + OMPI_DECLSPEC int mca_gpr_base_pack_dump(ompi_buffer_t cmd); + OMPI_DECLSPEC void mca_gpr_base_print_dump(ompi_buffer_t buffer, int output_id); + + OMPI_DECLSPEC int mca_gpr_base_pack_cleanup_job(ompi_buffer_t buffer, mca_ns_base_jobid_t jobid); + OMPI_DECLSPEC int mca_gpr_base_pack_cleanup_proc(ompi_buffer_t buffer, bool purge, ompi_process_name_t *proc); + + OMPI_DECLSPEC int mca_gpr_base_pack_test_internals(ompi_buffer_t cmd, int level); + OMPI_DECLSPEC int mca_gpr_base_unpack_test_internals(ompi_buffer_t buffer, ompi_list_t *return_list); + + OMPI_DECLSPEC int mca_gpr_base_pack_notify_off(ompi_buffer_t cmd, + ompi_process_name_t *proc, + ompi_registry_notify_id_t sub_number); + OMPI_DECLSPEC int mca_gpr_base_unpack_notify_off(ompi_buffer_t buffer); + + OMPI_DECLSPEC int mca_gpr_base_pack_notify_on(ompi_buffer_t cmd, + ompi_process_name_t *proc, + ompi_registry_notify_id_t sub_number); + OMPI_DECLSPEC int mca_gpr_base_unpack_notify_on(ompi_buffer_t buffer); + + OMPI_DECLSPEC int mca_gpr_base_pack_assume_ownership(ompi_buffer_t cmd, bool silent, + mca_ns_base_jobid_t jobid, char *segment); + OMPI_DECLSPEC int mca_gpr_base_unpack_assume_ownership(ompi_buffer_t buffer); + + OMPI_DECLSPEC int mca_gpr_base_pack_get_startup_msg(ompi_buffer_t cmd, mca_ns_base_jobid_t jobid); + OMPI_DECLSPEC ompi_buffer_t mca_gpr_base_unpack_get_startup_msg(ompi_buffer_t buffer, ompi_list_t *recipients); + + OMPI_DECLSPEC int mca_gpr_base_pack_get_shutdown_msg(ompi_buffer_t cmd, mca_ns_base_jobid_t jobid); + ompi_buffer_t mca_gpr_base_unpack_get_shutdown_msg(ompi_buffer_t buffer, ompi_list_t *recipients); + + OMPI_DECLSPEC int mca_gpr_base_pack_triggers_active_cmd(ompi_buffer_t cmd, mca_ns_base_jobid_t jobid); + OMPI_DECLSPEC int mca_gpr_base_unpack_triggers_active_cmd(ompi_buffer_t cmd); + + OMPI_DECLSPEC int mca_gpr_base_pack_triggers_inactive_cmd(ompi_buffer_t cmd, mca_ns_base_jobid_t jobid); + OMPI_DECLSPEC int mca_gpr_base_unpack_triggers_inactive_cmd(ompi_buffer_t cmd); #if defined(c_plusplus) || defined(__cplusplus) } #endif +/* + * globals that might be needed + */ +extern int mca_gpr_base_output; +extern mca_gpr_base_module_t ompi_registry; /* holds selected module's function pointers */ +extern bool mca_gpr_base_selected; +extern ompi_list_t mca_gpr_base_components_available; +extern mca_gpr_base_component_t mca_gpr_base_selected_component; + + #endif diff --git a/src/mca/gpr/base/gpr_base_close.c b/src/mca/gpr/base/gpr_base_close.c index 43097d6903..0dd7eabcef 100644 --- a/src/mca/gpr/base/gpr_base_close.c +++ b/src/mca/gpr/base/gpr_base_close.c @@ -4,11 +4,6 @@ #include "ompi_config.h" -#include - -#include "include/constants.h" -#include "mca/mca.h" -#include "mca/base/base.h" #include "mca/gpr/base/base.h" diff --git a/src/mca/gpr/base/gpr_base_open.c b/src/mca/gpr/base/gpr_base_open.c index 7573bc177a..1da59ad3b4 100644 --- a/src/mca/gpr/base/gpr_base_open.c +++ b/src/mca/gpr/base/gpr_base_open.c @@ -4,12 +4,6 @@ #include "ompi_config.h" -#include "mca/mca.h" -#include "mca/base/base.h" -#include "mca/base/mca_base_param.h" -#include "util/output.h" -#include "util/proc_info.h" -#include "mca/oob/base/base.h" #include "mca/gpr/base/base.h" @@ -49,6 +43,26 @@ OBJ_CLASS_INSTANCE( ompi_registry_value_destructor); /* destructor */ +/* constructor - used to initialize state of compound_cmd_value instance */ +static void ompi_registry_compound_cmd_results_construct(ompi_registry_compound_cmd_results_t* results) +{ + OBJ_CONSTRUCT(&results->data, ompi_list_t); +} + +/* destructor - used to free any resources held by instance */ +static void ompi_registry_compound_cmd_results_destructor(ompi_registry_compound_cmd_results_t* results) +{ + OBJ_DESTRUCT(&results->data); +} + +/* define instance of ompi_class_t */ +OBJ_CLASS_INSTANCE( + ompi_registry_compound_cmd_results_t, /* type name */ + ompi_list_item_t, /* parent "class" name */ + ompi_registry_compound_cmd_results_construct, /* constructor */ + ompi_registry_compound_cmd_results_destructor); /* destructor */ + + /* constructor - used to initialize state of index_value instance */ static void ompi_registry_index_value_construct(ompi_registry_index_value_t* value) { @@ -97,36 +111,10 @@ OBJ_CLASS_INSTANCE( ompi_registry_internal_test_results_destructor); /* destructor */ -/* constructor - used to initialize notify message instance */ -static void mca_gpr_notify_request_tracker_construct(mca_gpr_notify_request_tracker_t* req) -{ - req->requestor = NULL; - req->req_tag = 0; - req->callback = NULL; - req->user_tag = NULL; - req->id_tag = MCA_GPR_NOTIFY_ID_MAX; -} - -/* destructor - used to free any resources held by instance */ -static void mca_gpr_notify_request_tracker_destructor(mca_gpr_notify_request_tracker_t* req) -{ - if (NULL != req->requestor) { - free(req->requestor); - } -} - -/* define instance of ompi_class_t */ -OBJ_CLASS_INSTANCE( - mca_gpr_notify_request_tracker_t, /* type name */ - ompi_list_item_t, /* parent "class" name */ - mca_gpr_notify_request_tracker_construct, /* constructor */ - mca_gpr_notify_request_tracker_destructor); /* destructor */ - - /* constructor - used to initialize notify idtag list instance */ static void mca_gpr_idtag_list_construct(mca_gpr_idtag_list_t* req) { - req->id_tag = MCA_GPR_NOTIFY_ID_MAX; + req->id_tag = OMPI_REGISTRY_NOTIFY_ID_MAX; } /* destructor - used to free any resources held by instance */ @@ -145,6 +133,8 @@ OBJ_CLASS_INSTANCE( /* constructor - used to initialize notify message instance */ static void ompi_registry_notify_message_construct(ompi_registry_notify_message_t* msg) { + msg->segment = NULL; + msg->owning_job = 0; OBJ_CONSTRUCT(&msg->data, ompi_list_t); msg->trig_action = OMPI_REGISTRY_NOTIFY_NONE; msg->trig_synchro = OMPI_REGISTRY_SYNCHRO_MODE_NONE; @@ -159,6 +149,10 @@ static void ompi_registry_notify_message_destructor(ompi_registry_notify_message char **tokptr; ompi_registry_value_t *ptr; + if (NULL != msg->segment) { + free(msg->segment); + } + while (NULL != (ptr = (ompi_registry_value_t*)ompi_list_remove_first(&msg->data))) { OBJ_RELEASE(ptr); } @@ -189,6 +183,7 @@ mca_gpr_base_module_t ompi_registry; bool mca_gpr_base_selected = false; ompi_list_t mca_gpr_base_components_available; mca_gpr_base_component_t mca_gpr_base_selected_component; +ompi_mutex_t mca_gpr_mutex; /** @@ -197,6 +192,7 @@ mca_gpr_base_component_t mca_gpr_base_selected_component; */ int mca_gpr_base_open(void) { + /* Open up all available components */ if (OMPI_SUCCESS != diff --git a/src/mca/gpr/base/gpr_base_pack_cleanup.c b/src/mca/gpr/base/gpr_base_pack_cleanup.c new file mode 100644 index 0000000000..e5f3485ecc --- /dev/null +++ b/src/mca/gpr/base/gpr_base_pack_cleanup.c @@ -0,0 +1,55 @@ +/* + * $HEADER$ + */ +/** @file: + * + * The Open MPI general purpose registry - implementation. + * + */ + +/* + * includes + */ + +#include "ompi_config.h" + +#include "mca/gpr/base/base.h" + +int mca_gpr_base_pack_cleanup_job(ompi_buffer_t buffer, mca_ns_base_jobid_t jobid) +{ + mca_gpr_cmd_flag_t command; + + command = MCA_GPR_CLEANUP_JOB_CMD; + + if (OMPI_SUCCESS != ompi_pack(buffer, &command, 1, MCA_GPR_OOB_PACK_CMD)) { + return OMPI_ERROR; + } + + if (OMPI_SUCCESS != ompi_pack(buffer, &jobid, 1, MCA_GPR_OOB_PACK_JOBID)) { + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} + + +int mca_gpr_base_pack_cleanup_proc(ompi_buffer_t buffer, bool purge, ompi_process_name_t *proc) +{ + mca_gpr_cmd_flag_t command; + + command = MCA_GPR_CLEANUP_PROC_CMD; + + if (OMPI_SUCCESS != ompi_pack(buffer, &command, 1, MCA_GPR_OOB_PACK_CMD)) { + return OMPI_ERROR; + } + + if (OMPI_SUCCESS != ompi_pack(buffer, &purge, 1, MCA_GPR_OOB_PACK_BOOL)) { + return OMPI_ERROR; + } + + if (OMPI_SUCCESS != ompi_pack(buffer, proc, 1, MCA_GPR_OOB_PACK_NAME)) { + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} diff --git a/src/mca/gpr/base/gpr_base_pack_del_index.c b/src/mca/gpr/base/gpr_base_pack_del_index.c new file mode 100644 index 0000000000..991e5d83b3 --- /dev/null +++ b/src/mca/gpr/base/gpr_base_pack_del_index.c @@ -0,0 +1,116 @@ +/* + * $HEADER$ + */ +/** @file: + * + */ + +#include "ompi_config.h" + +#include "mca/gpr/base/base.h" + +int mca_gpr_base_pack_delete_segment(ompi_buffer_t cmd, bool silent, char *segment) +{ + mca_gpr_cmd_flag_t command; + int8_t tmp_bool; + + command = MCA_GPR_DELETE_SEGMENT_CMD; + + if (OMPI_SUCCESS != ompi_pack(cmd, &command, 1, MCA_GPR_OOB_PACK_CMD)) { + return OMPI_ERROR; + } + + tmp_bool = (int8_t)silent; + if (OMPI_SUCCESS != ompi_pack(cmd, &tmp_bool, 1, MCA_GPR_OOB_PACK_BOOL)) { + return OMPI_ERROR; + } + + if (OMPI_SUCCESS != ompi_pack_string(cmd, segment)) { + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} + + +int mca_gpr_base_pack_delete_object(ompi_buffer_t cmd, bool silent, + ompi_registry_mode_t mode, + char *segment, char **tokens) +{ + mca_gpr_cmd_flag_t command; + char **tokptr; + int32_t num_tokens; + int i; + int8_t tmp_bool; + + command = MCA_GPR_DELETE_OBJECT_CMD; + + if (OMPI_SUCCESS != ompi_pack(cmd, &command, 1, MCA_GPR_OOB_PACK_CMD)) { + return OMPI_ERROR; + } + + tmp_bool = (int8_t)silent; + if (OMPI_SUCCESS != ompi_pack(cmd, &tmp_bool, 1, MCA_GPR_OOB_PACK_BOOL)) { + return OMPI_ERROR; + } + + if (OMPI_SUCCESS != ompi_pack(cmd, &mode, 1, MCA_GPR_OOB_PACK_MODE)) { + return OMPI_ERROR; + } + + if (OMPI_SUCCESS != ompi_pack_string(cmd, segment)) { + return OMPI_ERROR; + } + + /* compute number of tokens */ + tokptr = tokens; + num_tokens = 0; + while (NULL != *tokptr) { + num_tokens++; + tokptr++; + } + + if (OMPI_SUCCESS != ompi_pack(cmd, &num_tokens, 1, OMPI_INT32)) { + return OMPI_ERROR; + } + + tokptr = tokens; + for (i=0; i -#include -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_SYS_TYPES_H -#include -#endif - -#include "runtime/runtime.h" -#include "util/output.h" -#include "util/proc_info.h" -#include "mca/mca.h" -#include "mca/base/base.h" #include "mca/gpr/base/base.h" diff --git a/src/mca/gpr/base/gpr_base_unpack_del_index.c b/src/mca/gpr/base/gpr_base_unpack_del_index.c new file mode 100644 index 0000000000..822899a51e --- /dev/null +++ b/src/mca/gpr/base/gpr_base_unpack_del_index.c @@ -0,0 +1,77 @@ +/* + * $HEADER$ + */ +/** @file: + * + */ + +#include "ompi_config.h" + +#include "mca/gpr/base/base.h" + + +int mca_gpr_base_unpack_delete_segment(ompi_buffer_t buffer) +{ + mca_gpr_cmd_flag_t command; + int32_t response; + + if ((OMPI_SUCCESS != ompi_unpack(buffer, &command, 1, MCA_GPR_OOB_PACK_CMD)) + || (MCA_GPR_DELETE_SEGMENT_CMD != command)) { + return OMPI_ERROR; + } + + if (OMPI_SUCCESS != ompi_unpack(buffer, &response, 1, OMPI_INT32)) { + return OMPI_ERROR; + } else { + return (int)response; + } +} + + +int mca_gpr_base_unpack_delete_object(ompi_buffer_t buffer) +{ + mca_gpr_cmd_flag_t command; + int32_t response; + + if ((OMPI_SUCCESS != ompi_unpack(buffer, &command, 1, MCA_GPR_OOB_PACK_CMD)) + || (MCA_GPR_DELETE_OBJECT_CMD != command)) { + return OMPI_ERROR; + } + + if (OMPI_SUCCESS != ompi_unpack(buffer, &response, 1, OMPI_INT32)) { + return OMPI_ERROR; + } else { + return (int)response; + } +} + + +int mca_gpr_base_unpack_index(ompi_buffer_t buffer, ompi_list_t *return_list) +{ + mca_gpr_cmd_flag_t command; + int32_t num_responses; + ompi_registry_index_value_t *newptr; + char *string1; + int i; + + if ((OMPI_SUCCESS != ompi_unpack(buffer, &command, 1, MCA_GPR_OOB_PACK_CMD)) + || (MCA_GPR_INDEX_CMD != command)) { + return OMPI_ERROR; + } + + if ((OMPI_SUCCESS != ompi_unpack(buffer, &num_responses, 1, OMPI_INT32)) || + (0 >= num_responses)) { + return OMPI_ERROR; + } + + for (i=0; i ompi_unpack_string(buffer, &string1)) { + return OMPI_ERROR; + } + newptr = OBJ_NEW(ompi_registry_index_value_t); + newptr->token = strdup(string1); + ompi_list_append(return_list, &newptr->item); + } + + return OMPI_SUCCESS; +} diff --git a/src/mca/gpr/base/gpr_base_unpack_mode_ops.c b/src/mca/gpr/base/gpr_base_unpack_mode_ops.c new file mode 100644 index 0000000000..7262dcdfee --- /dev/null +++ b/src/mca/gpr/base/gpr_base_unpack_mode_ops.c @@ -0,0 +1,55 @@ +/* + * $HEADER$ + */ +/** @file: + * + * The Open MPI general purpose registry - implementation. + * + */ + +/* + * includes + */ + +#include "ompi_config.h" + +#include "mca/gpr/base/base.h" + +int mca_gpr_base_unpack_triggers_active_cmd(ompi_buffer_t cmd) +{ + return OMPI_SUCCESS; +} + +int mca_gpr_base_unpack_triggers_inactive(ompi_buffer_t cmd) +{ + return OMPI_SUCCESS; + +} + +int mca_gpr_base_unpack_notify_on(ompi_buffer_t cmd) +{ + return OMPI_SUCCESS; + +} + +int mca_gpr_base_unpack_notify_off(ompi_buffer_t cmd) +{ + return OMPI_SUCCESS; +} + +int mca_gpr_base_unpack_assume_ownership(ompi_buffer_t cmd) +{ + mca_gpr_cmd_flag_t command; + int32_t response; + + if ((OMPI_SUCCESS != ompi_unpack(cmd, &command, 1, MCA_GPR_OOB_PACK_CMD)) || + (MCA_GPR_ASSUME_OWNERSHIP_CMD != command)) { + return OMPI_ERROR; + } + + if (OMPI_SUCCESS != ompi_unpack(cmd, &response, 1, OMPI_INT32)) { + return OMPI_ERROR; + } + + return (int)response; +} diff --git a/src/mca/gpr/base/gpr_base_unpack_put_get.c b/src/mca/gpr/base/gpr_base_unpack_put_get.c new file mode 100644 index 0000000000..e6fd36d14f --- /dev/null +++ b/src/mca/gpr/base/gpr_base_unpack_put_get.c @@ -0,0 +1,71 @@ +/* + * $HEADER$ + */ +/** @file: + * + * The Open MPI general purpose registry - base unpack functions. + * + */ + +/* + * includes + */ + +#include "ompi_config.h" + +#include "mca/gpr/base/base.h" + +int mca_gpr_base_unpack_put(ompi_buffer_t buffer) +{ + mca_gpr_cmd_flag_t command; + int32_t response; + + if ((OMPI_SUCCESS != ompi_unpack(buffer, &command, 1, MCA_GPR_OOB_PACK_CMD)) + || (MCA_GPR_PUT_CMD != command)) { + return OMPI_ERROR; + } + + if (OMPI_SUCCESS != ompi_unpack(buffer, &response, 1, OMPI_INT32)) { + return OMPI_ERROR; + } else { + return (int)response; + } + +} + + +int mca_gpr_base_unpack_get(ompi_buffer_t buffer, ompi_list_t *returned_list) +{ + mca_gpr_cmd_flag_t command; + int32_t object_size, num_responses; + ompi_registry_value_t *newptr; + ompi_registry_object_t *object; + int i; + + if ((OMPI_SUCCESS != ompi_unpack(buffer, &command, 1, MCA_GPR_OOB_PACK_CMD)) + || (MCA_GPR_GET_CMD != command)) { + return OMPI_ERROR; + } + + if ((OMPI_SUCCESS != ompi_unpack(buffer, &num_responses, 1, OMPI_INT32)) || + (0 >= num_responses)) { + return OMPI_ERROR; + } + + for (i=0; iobject_size = object_size; + newptr->object = object; + ompi_list_append(returned_list, &newptr->item); + } + + return OMPI_SUCCESS; +} diff --git a/src/mca/gpr/base/gpr_base_unpack_startup_shutdown_msg.c b/src/mca/gpr/base/gpr_base_unpack_startup_shutdown_msg.c new file mode 100644 index 0000000000..bbcdd8d514 --- /dev/null +++ b/src/mca/gpr/base/gpr_base_unpack_startup_shutdown_msg.c @@ -0,0 +1,101 @@ +/* -*- C -*- + * + * $HEADER$ + */ +/** @file: + * + * The Open MPI General Purpose Registry - unpack functions + * + */ + +/* + * includes + */ +#include "ompi_config.h" + +#include "mca/gpr/base/base.h" + +ompi_buffer_t +mca_gpr_base_unpack_get_startup_msg(ompi_buffer_t buffer, + ompi_list_t *recipients) +{ + mca_gpr_cmd_flag_t command; + int32_t num_recipients, i; + ompi_process_name_t proc; + ompi_name_server_namelist_t *peer; + ompi_buffer_t msg; + void *addr; + int size; + + if ((OMPI_SUCCESS != ompi_unpack(buffer, &command, 1, MCA_GPR_OOB_PACK_CMD)) + || (MCA_GPR_GET_STARTUP_MSG_CMD != command)) { + return NULL; + } + + if (OMPI_SUCCESS != ompi_unpack(buffer, &num_recipients, 1, OMPI_INT32)) { + return NULL; + } + + for (i=0; iname = ompi_name_server.copy_process_name(&proc);; + ompi_list_append(recipients, &peer->item); + } + + if (OMPI_SUCCESS != ompi_buffer_init(&msg, 0)) { + return NULL; + } + + ompi_buffer_get(buffer, &addr, &size); + if (0 < size) { + ompi_pack(msg, addr, size, OMPI_BYTE); + } + + return msg; +} + + +ompi_buffer_t +mca_gpr_base_unpack_get_shutdown_msg(ompi_buffer_t buffer, + ompi_list_t *recipients) +{ + mca_gpr_cmd_flag_t command; + int32_t num_recipients, i; + ompi_process_name_t proc; + ompi_name_server_namelist_t *peer; + ompi_buffer_t msg; + void *addr; + int size; + + if ((OMPI_SUCCESS != ompi_unpack(buffer, &command, 1, MCA_GPR_OOB_PACK_CMD)) + || (MCA_GPR_GET_SHUTDOWN_MSG_CMD != command)) { + return NULL; + } + + if (OMPI_SUCCESS != ompi_unpack(buffer, &num_recipients, 1, OMPI_INT32)) { + return NULL; + } + + for (i=0; iname = ompi_name_server.copy_process_name(&proc);; + ompi_list_append(recipients, &peer->item); + } + + if (OMPI_SUCCESS != ompi_buffer_init(&msg, 0)) { + return NULL; + } + + ompi_buffer_get(buffer, &addr, &size); + if (0 < size) { + ompi_pack(msg, addr, size, OMPI_BYTE); + } + + return msg; +} diff --git a/src/mca/gpr/base/gpr_base_unpack_subscribe.c b/src/mca/gpr/base/gpr_base_unpack_subscribe.c new file mode 100644 index 0000000000..7f01484859 --- /dev/null +++ b/src/mca/gpr/base/gpr_base_unpack_subscribe.c @@ -0,0 +1,51 @@ +/* + * $HEADER$ + */ +/** @file: + * + * The Open MPI general purpose registry - unpack functions. + * + */ + +/* + * includes + */ + +#include "ompi_config.h" + +#include "mca/gpr/base/base.h" + + +int mca_gpr_base_unpack_subscribe(ompi_buffer_t buffer, ompi_registry_notify_id_t *remote_idtag) +{ + mca_gpr_cmd_flag_t command; + + if ((OMPI_SUCCESS != ompi_unpack(buffer, &command, 1, MCA_GPR_OOB_PACK_CMD)) + || (MCA_GPR_SUBSCRIBE_CMD != command)) { + return OMPI_ERROR; + } + + if (OMPI_SUCCESS != ompi_unpack(buffer, remote_idtag, 1, MCA_GPR_OOB_PACK_NOTIFY_ID)) { + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} + + +int mca_gpr_base_unpack_unsubscribe(ompi_buffer_t buffer) +{ + mca_gpr_cmd_flag_t command; + int32_t response; + + if ((OMPI_SUCCESS != ompi_unpack(buffer, &command, 1, MCA_GPR_OOB_PACK_CMD)) + || (MCA_GPR_UNSUBSCRIBE_CMD != command)) { + return OMPI_ERROR; + } + + if (OMPI_SUCCESS != ompi_unpack(buffer, &response, 1, OMPI_INT32)) { + return OMPI_ERROR; + } + + return (int)response; +} diff --git a/src/mca/gpr/base/gpr_base_unpack_synchro.c b/src/mca/gpr/base/gpr_base_unpack_synchro.c new file mode 100644 index 0000000000..b27d602428 --- /dev/null +++ b/src/mca/gpr/base/gpr_base_unpack_synchro.c @@ -0,0 +1,52 @@ +/* + * $HEADER$ + */ +/** @file: + * + * The Open MPI general purpose registry - unpack functions. + * + */ + +/* + * includes + */ + +#include "ompi_config.h" + +#include "mca/gpr/base/base.h" + + +int mca_gpr_base_unpack_synchro(ompi_buffer_t buffer, ompi_registry_notify_id_t *remote_idtag) +{ + mca_gpr_cmd_flag_t command; + int32_t response; + + if ((OMPI_SUCCESS != ompi_unpack(buffer, &command, 1, MCA_GPR_OOB_PACK_CMD)) + || (MCA_GPR_SUBSCRIBE_CMD != command)) { + return OMPI_ERROR; + } + + if (OMPI_SUCCESS != ompi_unpack(buffer, remote_idtag, 1, MCA_GPR_OOB_PACK_NOTIFY_ID)) { + return OMPI_ERROR; + } + + return (int)response; +} + + +int mca_gpr_base_unpack_cancel_synchro(ompi_buffer_t buffer) +{ + mca_gpr_cmd_flag_t command; + int32_t response; + + if ((OMPI_SUCCESS != ompi_unpack(buffer, &command, 1, MCA_GPR_OOB_PACK_CMD)) + || (MCA_GPR_UNSUBSCRIBE_CMD != command)) { + return OMPI_ERROR; + } + + if (OMPI_SUCCESS != ompi_unpack(buffer, &response, 1, OMPI_INT32)) { + return OMPI_ERROR; + } + + return (int)response; +} diff --git a/src/mca/gpr/base/gpr_base_unpack_test_internals.c b/src/mca/gpr/base/gpr_base_unpack_test_internals.c new file mode 100644 index 0000000000..6b3d0ecb34 --- /dev/null +++ b/src/mca/gpr/base/gpr_base_unpack_test_internals.c @@ -0,0 +1,50 @@ +/* + * $HEADER$ + */ +/** @file: + * + * The Open MPI general purpose registry - implementation. + * + */ + +/* + * includes + */ + +#include "ompi_config.h" + +#include "mca/gpr/base/base.h" + +int mca_gpr_base_unpack_test_internals(ompi_buffer_t buffer, ompi_list_t *test_results) +{ + char **string1=NULL, **string2=NULL; + int i; + int32_t num_responses; + ompi_registry_internal_test_results_t *newptr=NULL; + mca_gpr_cmd_flag_t command; + + if ((OMPI_SUCCESS != ompi_unpack(buffer, &command, 1, MCA_GPR_OOB_PACK_CMD)) + || (MCA_GPR_TEST_INTERNALS_CMD != command)) { + return OMPI_ERROR; + } + + if ((OMPI_SUCCESS != ompi_unpack(buffer, &num_responses, 1, OMPI_INT32)) || + (0 >= num_responses)) { + return OMPI_ERROR; + } + + for (i=0; i ompi_unpack_string(buffer, string1)) { + return OMPI_ERROR; + } + if (0 > ompi_unpack_string(buffer, string2)) { + return OMPI_ERROR; + } + newptr = OBJ_NEW(ompi_registry_internal_test_results_t); + newptr->test = strdup(*string1); + newptr->message = strdup(*string2); + ompi_list_append(test_results, &newptr->item); + } + + return OMPI_SUCCESS; +} diff --git a/src/mca/gpr/gpr.h b/src/mca/gpr/gpr.h index 7dc9dd6fb1..49c4c067a3 100644 --- a/src/mca/gpr/gpr.h +++ b/src/mca/gpr/gpr.h @@ -28,6 +28,7 @@ #include "include/constants.h" #include "class/ompi_list.h" #include "util/bufpack.h" +#include "runtime/runtime_types.h" #include "mca/mca.h" #include "mca/oob/base/base.h" @@ -39,42 +40,50 @@ extern "C" { /** Define the notification actions for the subscription system */ -#define OMPI_REGISTRY_NOTIFY_NONE 0x0000 /**< Null case */ -#define OMPI_REGISTRY_NOTIFY_MODIFICATION 0x0001 /**< Notifies subscriber when object modified */ -#define OMPI_REGISTRY_NOTIFY_ADD_SUBSCRIBER 0x0002 /**< Notifies subscriber when another subscriber added */ -#define OMPI_REGISTRY_NOTIFY_DELETE_ENTRY 0x0004 /**< Notifies subscriber when object deleted */ -#define OMPI_REGISTRY_NOTIFY_ADD_ENTRY 0x0008 /**< Notifies subscriber when object added */ -#define OMPI_REGISTRY_NOTIFY_PRE_EXISTING 0x0010 /**< Send all pre-existing entries that meet conditions */ -#define OMPI_REGISTRY_NOTIFY_ALL 0xffff /**< Notifies subscriber upon any action */ +#define OMPI_REGISTRY_NOTIFY_NONE (uint16_t)0x0000 /**< Null case */ +#define OMPI_REGISTRY_NOTIFY_MODIFICATION (uint16_t)0x0001 /**< Notifies subscriber when object modified */ +#define OMPI_REGISTRY_NOTIFY_ADD_SUBSCRIBER (uint16_t)0x0002 /**< Notifies subscriber when another subscriber added */ +#define OMPI_REGISTRY_NOTIFY_DELETE_ENTRY (uint16_t)0x0004 /**< Notifies subscriber when object deleted */ +#define OMPI_REGISTRY_NOTIFY_ADD_ENTRY (uint16_t)0x0008 /**< Notifies subscriber when object added */ +#define OMPI_REGISTRY_NOTIFY_ON_STARTUP (uint16_t)0x0010 /**< Provide me with startup message - no data */ +#define OMPI_REGISTRY_NOTIFY_ON_SHUTDOWN (uint16_t)0x0020 /**< Provide me with shutdown message - no data */ +#define OMPI_REGISTRY_NOTIFY_PRE_EXISTING (uint16_t)0x0040 /**< Provide list of all pre-existing data */ +#define OMPI_REGISTRY_NOTIFY_INCLUDE_STARTUP_DATA (uint16_t)0x0080 /**< Provide data with startup message */ +#define OMPI_REGISTRY_NOTIFY_INCLUDE_SHUTDOWN_DATA (uint16_t)0x0100 /**< Provide data with shutdown message */ +#define OMPI_REGISTRY_NOTIFY_ONE_SHOT (uint16_t)0x0200 /**< Only trigger once - then delete subscription */ +#define OMPI_REGISTRY_NOTIFY_ALL (uint16_t)0xffff /**< Notifies subscriber upon any action */ typedef uint16_t ompi_registry_notify_action_t; -typedef uint32_t mca_gpr_notify_id_t; -#define MCA_GPR_NOTIFY_ID_MAX UINT32_MAX +typedef uint32_t ompi_registry_notify_id_t; +#define OMPI_REGISTRY_NOTIFY_ID_MAX UINT32_MAX /* * Define synchro mode flags */ -#define OMPI_REGISTRY_SYNCHRO_MODE_NONE 0x00 /**< No synchronization */ -#define OMPI_REGISTRY_SYNCHRO_MODE_ASCENDING 0x01 /**< Notify when trigger is reached, ascending mode */ -#define OMPI_REGISTRY_SYNCHRO_MODE_DESCENDING 0x02 /**< Notify when trigger is reached, descending mode */ -#define OMPI_REGISTRY_SYNCHRO_MODE_LEVEL 0x04 /**< Notify when trigger is reached, regardless of direction */ -#define OMPI_REGISTRY_SYNCHRO_MODE_GT_EQUAL 0x08 /**< Notify if level greater than or equal */ -#define OMPI_REGISTRY_SYNCHRO_MODE_LT_EQUAL 0x10 /**< Notify if level less than or equal */ -#define OMPI_REGISTRY_SYNCHRO_MODE_CONTINUOUS 0x80 /**< Notify whenever conditions are met */ -#define OMPI_REGISTRY_SYNCHRO_MODE_ONE_SHOT 0x81 /**< Fire once, then terminate synchro command */ - -typedef uint16_t ompi_registry_synchro_mode_t; +#define OMPI_REGISTRY_SYNCHRO_MODE_NONE (uint8_t)0x00 /**< No synchronization */ +#define OMPI_REGISTRY_SYNCHRO_MODE_ASCENDING (uint8_t)0x01 /**< Notify when trigger is reached, ascending mode */ +#define OMPI_REGISTRY_SYNCHRO_MODE_DESCENDING (uint8_t)0x02 /**< Notify when trigger is reached, descending mode */ +#define OMPI_REGISTRY_SYNCHRO_MODE_LEVEL (uint8_t)0x04 /**< Notify when trigger is reached, regardless of direction */ +#define OMPI_REGISTRY_SYNCHRO_MODE_GT_EQUAL (uint8_t)0x08 /**< Notify if level greater than or equal */ +#define OMPI_REGISTRY_SYNCHRO_MODE_LT_EQUAL (uint8_t)0x10 /**< Notify if level less than or equal */ +#define OMPI_REGISTRY_SYNCHRO_MODE_CONTINUOUS (uint8_t)0x80 /**< Notify whenever conditions are met */ +#define OMPI_REGISTRY_SYNCHRO_MODE_ONE_SHOT (uint8_t)0x81 /**< Fire once, then terminate synchro command */ +#define OMPI_REGISTRY_SYNCHRO_MODE_STARTUP (uint8_t)0x82 /**< Indicates associated with application startup */ +#define OMPI_REGISTRY_SYNCHRO_MODE_SHUTDOWN (uint8_t)0x84 /**< Indicates associated with application shutdown */ +typedef uint8_t ompi_registry_synchro_mode_t; /** Return value for notify requests */ struct ompi_registry_notify_message_t { - ompi_list_t data; /**< List of data objects */ - ompi_registry_notify_action_t trig_action; - ompi_registry_synchro_mode_t trig_synchro; - uint32_t num_tokens; - char **tokens; + char *segment; /**< Name of originating segment */ + mca_ns_base_jobid_t owning_job; /**< Job that owns that segment */ + ompi_list_t data; /**< List of data objects */ + ompi_registry_notify_action_t trig_action; /**< If subscription, action that triggered message */ + ompi_registry_synchro_mode_t trig_synchro; /**< If synchro, action that triggered message */ + uint32_t num_tokens; /**< Number of tokens in subscription/synchro */ + char **tokens; /**< List of tokens in subscription/synchro */ }; typedef struct ompi_registry_notify_message_t ompi_registry_notify_message_t; @@ -85,35 +94,50 @@ typedef void (*ompi_registry_notify_cb_fn_t)(ompi_registry_notify_message_t *not -/** Define the mode bit-masks for registry operations. +/** Define the addressing mode bit-masks for registry operations. */ -#define OMPI_REGISTRY_NONE 0x0000 /**< None */ -#define OMPI_REGISTRY_OVERWRITE 0x0001 /**< Overwrite Permission */ -#define OMPI_REGISTRY_AND 0x0002 /**< AND tokens together for search results */ -#define OMPI_REGISTRY_OR 0x0004 /**< OR tokens for search results */ -#define OMPI_REGISTRY_XAND 0x0008 /**< All tokens required, nothing else allowed */ -#define OMPI_REGISTRY_XOR 0x0010 /**< Any one of the tokens required, nothing else allowed */ +#define OMPI_REGISTRY_NONE (uint16_t)0x0000 /**< None */ +#define OMPI_REGISTRY_OVERWRITE (uint16_t)0x0001 /**< Overwrite Permission */ +#define OMPI_REGISTRY_AND (uint16_t)0x0002 /**< AND tokens together for search results */ +#define OMPI_REGISTRY_OR (uint16_t)0x0004 /**< OR tokens for search results */ +#define OMPI_REGISTRY_XAND (uint16_t)0x0008 /**< All tokens required, nothing else allowed */ +#define OMPI_REGISTRY_XOR (uint16_t)0x0010 /**< Any one of the tokens required, nothing else allowed */ typedef uint16_t ompi_registry_mode_t; +/** Define flag values for requesting return data from compound commands + */ + +#define OMPI_REGISTRY_RETURN_REQUESTED true /**< Return information from compound command */ +#define OMPI_REGISTRY_NO_RETURN_REQUESTED false /**< Do not return information from compound cmd */ + /* * Define flag values for remote commands - only used internally */ -#define MCA_GPR_DELETE_SEGMENT_CMD 0x0001 -#define MCA_GPR_PUT_CMD 0x0002 -#define MCA_GPR_DELETE_OBJECT_CMD 0x0004 -#define MCA_GPR_INDEX_CMD 0x0008 -#define MCA_GPR_SUBSCRIBE_CMD 0x0010 -#define MCA_GPR_UNSUBSCRIBE_CMD 0x0020 -#define MCA_GPR_SYNCHRO_CMD 0x0040 -#define MCA_GPR_CANCEL_SYNCHRO_CMD 0x0080 -#define MCA_GPR_GET_CMD 0x0100 -#define MCA_GPR_TEST_INTERNALS_CMD 0x0200 -#define MCA_GPR_NOTIFY_CMD 0x0400 /**< Indicates a notify message */ -#define MCA_GPR_RTE_REGISTER_CMD 0x0800 -#define MCA_GPR_RTE_UNREGISTER_CMD 0x1000 -#define MCA_GPR_ERROR 0xffff +#define MCA_GPR_DELETE_SEGMENT_CMD (uint16_t)0x0001 +#define MCA_GPR_PUT_CMD (uint16_t)0x0002 +#define MCA_GPR_DELETE_OBJECT_CMD (uint16_t)0x0004 +#define MCA_GPR_INDEX_CMD (uint16_t)0x0008 +#define MCA_GPR_SUBSCRIBE_CMD (uint16_t)0x0010 +#define MCA_GPR_UNSUBSCRIBE_CMD (uint16_t)0x0020 +#define MCA_GPR_SYNCHRO_CMD (uint16_t)0x0040 +#define MCA_GPR_CANCEL_SYNCHRO_CMD (uint16_t)0x0080 +#define MCA_GPR_GET_CMD (uint16_t)0x0100 +#define MCA_GPR_TEST_INTERNALS_CMD (uint16_t)0x0200 +#define MCA_GPR_NOTIFY_CMD (uint16_t)0x0400 /**< Indicates a notify message */ +#define MCA_GPR_DUMP_CMD (uint16_t)0x2000 +#define MCA_GPR_ASSUME_OWNERSHIP_CMD (uint16_t)0x4000 +#define MCA_GPR_NOTIFY_ON_CMD (uint16_t)0x8000 +#define MCA_GPR_NOTIFY_OFF_CMD (uint16_t)0x8001 +#define MCA_GPR_COMPOUND_CMD (uint16_t)0x8010 +#define MCA_GPR_GET_STARTUP_MSG_CMD (uint16_t)0x8020 +#define MCA_GPR_GET_SHUTDOWN_MSG_CMD (uint16_t)0x8040 +#define MCA_GPR_TRIGGERS_ACTIVE_CMD (uint16_t)0x8080 +#define MCA_GPR_TRIGGERS_INACTIVE_CMD (uint16_t)0x8100 +#define MCA_GPR_CLEANUP_JOB_CMD (uint16_t)0x8200 +#define MCA_GPR_CLEANUP_PROC_CMD (uint16_t)0x8400 +#define MCA_GPR_ERROR (uint16_t)0xffff typedef uint16_t mca_gpr_cmd_flag_t; @@ -123,12 +147,17 @@ typedef uint16_t mca_gpr_cmd_flag_t; /* CAUTION - any changes here must also change corresponding * typedefs above */ -#define MCA_GPR_OOB_PACK_CMD OMPI_INT16 -#define MCA_GPR_OOB_PACK_ACTION OMPI_INT16 -#define MCA_GPR_OOB_PACK_MODE OMPI_INT16 -#define MCA_GPR_OOB_PACK_OBJECT_SIZE OMPI_INT32 -#define MCA_GPR_OOB_PACK_SYNCHRO_MODE OMPI_INT16 - +#define MCA_GPR_OOB_PACK_CMD OMPI_INT16 +#define MCA_GPR_OOB_PACK_ACTION OMPI_INT16 +#define MCA_GPR_OOB_PACK_MODE OMPI_INT16 +#define MCA_GPR_OOB_PACK_OBJECT_SIZE OMPI_INT32 +#define MCA_GPR_OOB_PACK_SYNCHRO_MODE OMPI_INT8 +#define MCA_GPR_OOB_PACK_NOTIFY_ID OMPI_INT32 +#define MCA_GPR_OOB_PACK_BOOL OMPI_INT8 +#define MCA_GPR_OOB_PACK_STATUS_KEY OMPI_INT8 +#define MCA_GPR_OOB_PACK_EXIT_CODE OMPI_INT8 +#define MCA_GPR_OOB_PACK_JOBID OMPI_JOBID +#define MCA_GPR_OOB_PACK_NAME OMPI_NAME /* * typedefs @@ -168,6 +197,23 @@ typedef struct ompi_registry_index_value_t ompi_registry_index_value_t; OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_registry_index_value_t); +/** Return value structure for compound registry commands. + * A compound registry command contains multiple registry commands, all transferred + * in a single communication. Because of this, data returned by the individual + * commands within the compound command must be separated out so it can be clearly + * retrieved. This structure provides a wrapper for data returned by each of the + * individual commands. + */ +struct ompi_registry_compound_cmd_results_t { + ompi_list_item_t item; /**< Allows this item to be placed on a list */ + int32_t status_code; /**< Status code resulting from the command */ + ompi_list_t data; /**< Any returned data coming from the command */ +}; +typedef struct ompi_registry_compound_cmd_results_t ompi_registry_compound_cmd_results_t; + +OBJ_CLASS_DECLARATION(ompi_registry_compound_cmd_results_t); + + /** Return value for test results on internal test */ struct ompi_registry_internal_test_results_t { @@ -180,58 +226,591 @@ typedef struct ompi_registry_internal_test_results_t ompi_registry_internal_test OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_registry_internal_test_results_t); -struct mca_gpr_notify_request_tracker_t { - ompi_list_item_t item; - ompi_process_name_t *requestor; - int req_tag; - ompi_registry_notify_cb_fn_t callback; - void *user_tag; - mca_gpr_notify_id_t id_tag; -}; -typedef struct mca_gpr_notify_request_tracker_t mca_gpr_notify_request_tracker_t; - -OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_gpr_notify_request_tracker_t); - - struct mca_gpr_idtag_list_t { ompi_list_item_t item; - mca_gpr_notify_id_t id_tag; + ompi_registry_notify_id_t id_tag; }; typedef struct mca_gpr_idtag_list_t mca_gpr_idtag_list_t; OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_gpr_idtag_list_t); + /* * Component functions that MUST be provided */ + +/* + * Begin recording a compound command. + * Normally, the registry executes each command as it is called. This, however, can result + * in an undesirable amount of network traffic. To reduce the traffic, this command allows + * the user to aggregate a set of registry commands - in any combination of put, get, index, + * or any other command - to be executed via a single communication to the registry. + * + * While recording, all registry commands are stored in a buffer instead of being immediately + * executed. Thus, commands that retrieve information (e.g., "get") will return a NULL + * during recording. Values from these commands will be returned when the compound + * command is actually executed. + * + * The process of recording a compound command is thread safe. Threads attempting to + * record commands are held on a lock until given access in their turn. + * + * @param None + * @retval OMPI_SUCCESS Compound command recorder is active. + * @retval OMPI_ERROR Compound command recorder did not activate. + * + * @code + * ompi_registry.begin_compound_cmd(); + * @endcode + * + */ +typedef int (*mca_gpr_base_module_begin_compound_cmd_fn_t)(void); + +/* + * Stop recording a compound command + * Terminates the recording process and clears the buffer of any previous commands + * + * @param None + * @retval OMPI_SUCCESS Recording stopped and buffer successfully cleared + * @retval OMPI_ERROR Didn't work - no idea why it wouldn't + * + * @code + * ompi_registry.stop_compound_cmd(); + * @endcode + * + */ +typedef int (*mca_gpr_base_module_stop_compound_cmd_fn_t)(void); + +/* + * Execute the compound command + * Execute the compound command that has been recorded. Any output from each command + * is captured in a list that can be returned to the caller, depending upon the + * value of the input parameter. + * + * @param OMPI_REGISTRY_RETURN_REQUESTED Data and status codes returned by commands in + * the recorded compound command are to be returned in a list of ompi_registry_compound_cmd_value_t + * structures. + * @param OMPI_REGISTRY_NO_RETURN_REQUESTED Data and status codes returned by commands + * in the recorded compound command are to be discarded. + * + * @retval return_values A list of ompi_registry_compound_cmd_value_t structures that + * contain the results from each command (in sequence they were issued) of the compound command. + * @retval NULL No values returned. + * + * @code + * return_values = ompi_registry.exec_compound_cmd(OMPI_REGISTRY_RETURN_REQUESTED); + * + * ompi_registry.exec_compound_cmd(OMPI_REGISTRY_NO_RETURN_REQUESTED); + * @endcode + * + */ +typedef ompi_list_t* (*mca_gpr_base_module_exec_compound_cmd_fn_t)(bool return_requested); + +/* + * Turn return of status codes OFF. + * All registry functions normally return a status code, with the exception of those + * functions that return data values. This function sets a flag that turns OFF the + * status code returns. Normally used to reduce network traffic by eliminating the + * return of status codes. Commands will automatically return a default value of OMPI_SUCCESS. + * + * @param None + * @retval None + * + * @code + * ompi_registry.silent_mode_on(); + * @endcode + */ +typedef void (*mca_gpr_base_module_silent_mode_on_fn_t)(void); + +/* Turn return of status codes ON. + * All registry functions normally return a status code, with the exception of those + * functions that return data values. This function sets a flag that turns ON the + * status code returns (i.e., restores the default condition). + * + * @param None + * @retval None + * + * @code + * ompi_registry.silent_mode_off(); + * @endcode + */ +typedef void (*mca_gpr_base_module_silent_mode_off_fn_t)(void); + +/* Turn off subscriptions for this process + * Temporarily turn off subscriptions for this process on the registry. Until restored, + * the specified subscription will be ignored - no message will be sent. Providing a + * value of OMPI_REGISTRY_NOTIFY_ID_MAX for the subscription number will turn off ALL + * subscriptions with this process as the subscriber. + * + * Note: synchro messages will continue to be sent - only messages from subscriptions + * are affected. + * + * @param sub_number Notify id number of the subscription to be turned "off". A value + * of OMPI_REGISTRY_NOTIFY_ID_MAX indicates that ALL subscriptions with this process as the subscriber are to be + * turned "off" until further notice. + * + * @code + * ompi_registry.notify_off(subscription_number); + * @endcode + */ +typedef void (*mca_gpr_base_module_notify_off_fn_t)(ompi_registry_notify_id_t sub_number); + +/* Turn on subscriptions for this process + * Turn on subscriptions for this process on the registry. This is the default condition + * for subscriptions, indicating that messages generated by triggered subscriptions are to + * be sent to the subscribing process. + * + * @param sub_number Notify id number of the subscription to be turned "on". A value + * of OMPI_REGISTRY_NOTIFY_ID_MAX indicates that ALL subscriptions with this process as the subscriber are to be + * turned "on" until further notice. + * + * @code + * ompi_registry.notify_on(subscription_number); + * @endcode + */ +typedef void (*mca_gpr_base_module_notify_on_fn_t)(ompi_registry_notify_id_t sub_number); + +/* Turn triggers on for this jobid + * Activate all triggers for this jobid on the registry. Does not counteract the subscription on/off + * for each process. When created, segments default to triggers being INACTIVE. All + * subscriptions and synchros, therefore, are rendered inactive until the segment's + * triggers are turned "on". + * + * @param jobid The jobid whose triggers are to be activated. + * + * @code + * ompi_registry.triggers_active(jobid); + * @endcode + */ +typedef void (*mca_gpr_base_module_triggers_active_fn_t)(mca_ns_base_jobid_t jobid); + +/* Turn triggers off for this jobid. + * Deactivate all triggers for the specified job. All subscriptions and synchros will be + * rendered inactive regardless of recipients and/or conditions. + * + * @param jobid The jobid whose triggers are to be + * deactivated. + * + * @code + * ompi_registry.triggers_inactive(jobid); + * @endcode + */ +typedef void (*mca_gpr_base_module_triggers_inactive_fn_t)(mca_ns_base_jobid_t jobid); + +/* + * Get the job startup message. + * At the startup of any job, there is a set of information that needs to be sent to every + * process - this is known as the job startup message. This function provides an entry point + * for the controlling process (i.e., the one that is spawning the application - usually + * mpirun) to obtain the job startup message so it can subsequently "broadcast" it to all + * of the application's processes. + * + * @param jobid The id of the job being started. + * + * @param recipients A list of process names for the recipients - the input parameter + * is a pointer to the list; the function returns the list in that location. + * + * @retval msg A packed buffer containing all the information required. This + * information is obtained by gathering all data on all segments "owned" by the specified + * jobid. The registry has NO knowledge of what is in the data elements, where it should go, + * etc. The data from each segment is preceded by the name of the segment from which it came. + * A function for parsing this message and distributing the data is provided elsewhere - such + * functionality is beyond the purview of the registry. + * + * @code + * msg_buffer = ompi_registry.get_startup_msg(jobid, recipients); + * @endcode + * + */ +typedef ompi_buffer_t (*mca_gpr_base_module_get_startup_msg_fn_t)(mca_ns_base_jobid_t jobid, + ompi_list_t *recipients); + +/* + * Get the job shutdown message. + * Upon completing, each process waits for a final synchronizing message to arrive. This ensures + * that process all exit together and prevents, for example, "hangs" as one process tries to talk + * to another that has completed. Not much data should need to be shared during this operation, but + * this function provides an entry point in case something is identified. + * + * @param jobid The id of the job being shutdown. + * @param recipients A list of process names for the recipients - the input parameter + * is a pointer to the list; the function returns the list in that location. + * + * @retval msg A packed buffer containing the required information. At the moment, this will be an + * empty buffer as no information has yet been identified. + * + * @code + * msg_buffer = ompi_registry.get_shutdown_msg(jobid, recipients); + * @endcode + * + */ +typedef ompi_buffer_t (*mca_gpr_base_module_get_shutdown_msg_fn_t)(mca_ns_base_jobid_t jobid, + ompi_list_t *recipients); + +/* Cleanup a job from the registry + * Remove all references to a given job from the registry. This includes removing + * all segments "owned" by the job, and removing all process names from dictionaries + * in the registry. + * + * @param jobid The jobid to be cleaned up. + * + * @code + * ompi_registry.cleanup_job(jobid); + * @endcode + * + */ +typedef void (*mca_gpr_base_module_cleanup_job_fn_t)(mca_ns_base_jobid_t jobid); + +/* Cleanup a process from the registry + * Remove all references to a given process from the registry. This includes removing + * the process name from all dictionaries in the registry, all subscriptions, etc. + * It also includes reducing any syncrhos on segments owned by the associated job. + * + * @param proc A pointer to the process name to be cleaned up. + * + * @code + * ompi_registry.cleanup_process(&proc); + * @endcode + * + */ +typedef void (*mca_gpr_base_module_cleanup_proc_fn_t)(bool purge, ompi_process_name_t *proc); + +/* + * Delete a segment from the registry + * This command removes an entire segment from the registry, including all data objects, + * associated subscriptions, and synchros. This is a non-reversible process, so it should + * be used with care. + * + * @param segment Character string specifying the name of the segment to be removed. + * + * @retval OMPI_SUCCESS Segment successfully removed. + * @retval OMPI_ERROR Segment could not be removed for some reason - most + * likely, the segment name provided was not found in the registry. + * + * @code + * status_code = ompi_registry.delete_segment(segment); + * @endcode + */ typedef int (*mca_gpr_base_module_delete_segment_fn_t)(char *segment); + +/* + * Put a data object on the registry + * + * @param mode The addressing mode to be used. Addresses are defined by the tokens provided + * that describe the object being stored. The caller has the option of specifying how + * those tokens are to be combined in describing the object. Passing a value of + * "OMPI_REGISTRY_AND", for example, indicates that all provided tokens are to be used. + * In contrast, a value of "OMPI_REGISTRY_OR" indicates that any of the provided tokens + * can adequately describe the object. For the "put" command, only "OMPI_REGISTRY_XAND" + * is accepted - in other words, the tokens must exactly match those of any existing + * object in order for the object to be updated. In addition, the "OMPI_REGISTRY_OVERWRITE" + * flag must be or'd into the mode to enable update of the data object. If a data object + * is found with the identical token description, but OMPI_REGISTRY_OVERWRITE is NOT specified, + * then an error will be generated - the data object will NOT be overwritten in this + * situation. + * + * Upon completing the "put", all subscription and synchro requests registered on the + * specified segment are checked and appropriately processed. + * + * @param segment A character string specifying the name of the segment upon which + * the data object is to be placed. + * + * @param tokens A **char list of tokens describing the object. + * + * @param object An ompi_registry_object_t data object that is to be placed + * on the registry. The registry will copy this data object onto the specified segment - the + * calling program is responsible for freeing any memory, if appropriate. + * + * @param size An ompi_registry_object_size_t value indicating the size of the data + * object in bytes. + * + * @retval OMPI_SUCCESS The data object has been stored on the specified segment, or the + * corresponding existing data object has been updated. + * + * @retval OMPI_ERROR The data object was not stored on the specified segment, or the + * corresponding existing data object was not found, or the object was found but the overwrite + * flag was not set. + * + * @code + * status_code = ompi_registry.put(mode, segment, tokens, object, object_size); + * @endcode + */ typedef int (*mca_gpr_base_module_put_fn_t)(ompi_registry_mode_t mode, char *segment, - char **tokens, ompi_registry_object_t object, - ompi_registry_object_size_t size); + char **tokens, ompi_registry_object_t object, + ompi_registry_object_size_t size); + +/* + * Get data from the registry. + * Returns data from the registry. Given an addressing mode, segment name, and a set + * of tokens describing the data object, the "get" function will search the specified + * registry segment and return all data items that "match" the description. Addressing + * modes specify how the provided tokens are to be combined to determine the match - + * a value of "OMPI_REGISTRY_AND", for example, indictates that all the tokens must be + * included in the object's description, but allows for other tokens to also be present. + * A value of "OMPI_REGISTRY_XAND", in contrast, requires that all the tokens be present, + * and that ONLY those tokens be present. + * + * The data is returned as a list of ompi_registry_value_t objects. The caller is + * responsible for freeing this data storage. Only copies of the registry data are + * returned - thus, any actions taken by the caller will NOT impact data stored on the + * registry. + * + * @param addr_mode The addressing mode to be used in the search. + * @param segment A character string indicating the name of the segment to be searched. + * @param tokens A NULL-terminated **char list of tokens describing the objects to be + * returned. A value of NULL indicates that ALL data on the segment is to be returned. + * + * @retval data_list A list of ompi_registry_value_t objects containing the data objects + * returned by the specified search. + * + * @code + * data_list = ompi_registry.get(mode, segment, tokens); + * @endcode + */ typedef ompi_list_t* (*mca_gpr_base_module_get_fn_t)(ompi_registry_mode_t addr_mode, char *segment, char **tokens); -typedef int (*mca_gpr_base_module_delete_fn_t)(ompi_registry_mode_t addr_mode, - char *segment, char **tokens); + +/* + * Delete an object from the registry + * Remove an object from the registry. Given an addressing mode, segment name, and a set + * of tokens describing the data object, the function will search the specified + * registry segment and delete all data items that "match" the description. Addressing + * modes specify how the provided tokens are to be combined to determine the match - + * a value of "OMPI_REGISTRY_AND", for example, indictates that all the tokens must be + * included in the object's description, but allows for other tokens to also be present. + * A value of "OMPI_REGISTRY_XAND", in contrast, requires that all the tokens be present, + * and that ONLY those tokens be present. + * + * Note: A value of NULL for the tokens will delete ALL data items from the specified + * segment. + * + * @param addr_mode The addressing mode to be used in the search. + * @param segment A character string indicating the name of the segment to be searched. + * @param tokens A NULL-terminated **char list of tokens describing the objects to be + * returned. A value of NULL indicates that ALL data on the segment is to be removed. + * + * @code + * status_code = ompi_registry.delete_object(mode, segment, tokens); + * @endcode + */ +typedef int (*mca_gpr_base_module_delete_object_fn_t)(ompi_registry_mode_t addr_mode, + char *segment, char **tokens); + +/* + * Obtain an index of a specified dictionary + * The registry contains a dictionary at the global level (containing names of all the + * segments) and a dictionary for each segment (containing the names of all tokens used + * in that segment). This command allows the caller to obtain a list of all entries + * in the specified dictionary. + * + * @param segment A character string indicating the segment whose dictionary is to be + * indexed. A value of NULL indicates that the global level dictionary is to be used. + * + * @retval index_list A list of ompi_registry_index_value_t objects containing the + * dictionary entries. A list of zero length is returned if the specified segment + * cannot be found, or if the specified dictionary is empty. + * + * @code + * index_list = ompi_registry.index(segment); + * @endcode + */ typedef ompi_list_t* (*mca_gpr_base_module_index_fn_t)(char *segment); -typedef int (*mca_gpr_base_module_subscribe_fn_t)(ompi_registry_mode_t addr_mode, + +/* + * Subscribe to be notified upon a specified action + * The registry includes a publish/subscribe mechanism by which callers can be notified + * upon certain actions occuring to data objects stored on the registry. This function + * allows the caller to register for such notifications. The registry allows a subscription + * to be placed upon any segment, and upon the entire registry if desired. + * + * @param addr_mode The addressing mode to be used in specifying the objects to be + * monitored by this subscription. + * @param action The actions which are to trigger a notification message. These can + * be OR'd together from the defined registry action flags. + * @param segment A character string indicating the name of the segment upon which the + * subscription is being requested. A value of NULL indicates that the subscription + * is to be placed on the entire registry - this should be done with caution as the + * subscription will trigger on ALL registry events matching the specified action and + * addressing, potentially including those from jobs other than the one generating the + * subscription request. + * @param tokens A NULL-terminated **char list of tokens describing the objects to be + * monitored. A value of NULL indicates that ALL data on the segment is to be monitored. + * @param cb_func The ompi_registry_notify_cb_fn_t callback function to be called when + * a subscription is triggered. The data from each monitored object will be returned + * to the callback function in an ompi_registry_notify_message_t structure. + * @param user_tag A void* user-provided storage location that the caller can + * use for its own purposes. A NULL value is acceptable. + * + * @retval sub_number The subscription number of this request. Callers should save this + * number for later use if (for example) it is desired to temporarily turn "off" the subscription + * or to permanently remove the subscription from the registry. + * + * @code + * sub_number = ompi_registry.subscribe(addr_mode, action, segment, tokens, cb_func, user_tag); + * @endcode + */ +typedef ompi_registry_notify_id_t (*mca_gpr_base_module_subscribe_fn_t)(ompi_registry_mode_t addr_mode, ompi_registry_notify_action_t action, char *segment, char **tokens, ompi_registry_notify_cb_fn_t cb_func, void *user_tag); -typedef int (*mca_gpr_base_module_unsubscribe_fn_t)(ompi_registry_mode_t addr_mode, - ompi_registry_notify_action_t action, - char *segment, char **tokens); -typedef int (*mca_gpr_base_module_synchro_fn_t)(ompi_registry_synchro_mode_t synchro_mode, + +/* + * Cancel a subscription. + * Once a subscription has been entered on the registry, a caller may choose to permanently + * remove it at a later time. This function supports that request. + * + * @param sub_number The ompi_registry_notify_id_t value returned by the original subscribe + * command. + * + * @retval OMPI_SUCCESS The subscription was removed. + * @retval OMPI_ERROR The subscription could not be removed - most likely caused by specifying + * a non-existent (or previously removed) subscription number. + * + * @code + * status_code = ompi_registry.unsubscribe(sub_number); + * @endcode + */ +typedef int (*mca_gpr_base_module_unsubscribe_fn_t)(ompi_registry_notify_id_t sub_number); + +/* + * Request a synchro call from the registry + * Subscriptions indicate when a specified action has occurred on one or more data objects. + * In some conditions, however, it is desirable to simply know when a specified number of + * data objects is present on a given registry segment. For example, since each process must + * register its contact information on the registry, knowing when the number of registrations + * equals the number of processes can serve as an indicator that all process are ready to run. + * + * This function allows the caller to request notification of data object count meeting + * specified criteria on the indicated registry segment. Supported counting modes include + * "edge-triggered" (i.e., ascending or descending through a specified level) and "level" + * (the count being equal to, above, or below a specified value). + * + * Any objects already on the specified segment prior to issuing the synchro request + * will be counted when the request is registered on the registry. + * + * Upon triggering, the synchro returns all data objects included in the count in the + * notification message. + * + * @param addr_mode The addressing mode to be used in specifying the objects to be + * counted by this synchro. + * @param segment A character string indicating the name of the segment upon which the + * synchro is being requested. A value of NULL indicates that the synchro + * is to be placed on the entire registry - this should be done with caution as the + * synchro will fire based on counting ALL registry objects matching the specified + * addressing, potentially including those from jobs other than the one generating the + * synchro request. + * @param tokens A NULL-terminated **char list of tokens describing the objects to be + * counted. A value of NULL indicates that ALL objects on the segment are to be counted. + * @param cb_func The ompi_registry_notify_cb_fn_t callback function to be called when + * the synchro is triggered. The data from each counted object will be returned + * to the callback function in an ompi_registry_notify_message_t structure. + * @param user_tag A void* user-provided storage location that the caller can + * use for its own purposes. A NULL value is acceptable. + * + * @retval synch_number The synchro number of this request. Callers should save this + * number for later use if it is desired to permanently remove the synchro from the registry. + * Note: ONE_SHOT synchros are automatically removed from the registry when triggered. + * + * @code + * synch_number = ompi_registry.synchro(synch_mode, addr_mode, segment, tokens, trigger, + * cb_func, user_tag); + * @endcode + */ +typedef ompi_registry_notify_id_t (*mca_gpr_base_module_synchro_fn_t)(ompi_registry_synchro_mode_t synchro_mode, ompi_registry_mode_t addr_mode, char *segment, char **tokens, int trigger, ompi_registry_notify_cb_fn_t cb_func, void *user_tag); -typedef int (*mca_gpr_base_module_cancel_synchro_fn_t)(ompi_registry_synchro_mode_t synchro_mode, - ompi_registry_mode_t addr_mode, - char *segment, char **tokens, int trigger); -typedef int (*mca_gpr_base_module_register_fn_t)(char *contact_info, size_t num_procs, - ompi_registry_notify_cb_fn_t start_cb_func, void *start_user_tag, - ompi_registry_notify_cb_fn_t end_cb_func, void *end_user_tag); -typedef int (*mca_gpr_base_module_unregister_fn_t)(char *proc_name_string); +/* + * Cancel a synchro. + * Once a synchro has been entered on the registry, a caller may choose to + * remove it at a later time. This function supports that request. + * + * Note: ONE_SHOT synchros are automatically removed from the registry when triggered. + * + * @param synch_number The ompi_registry_notify_id_t value returned by the original synchro + * command. + * + * @retval OMPI_SUCCESS The synchro was removed. + * @retval OMPI_ERROR The synchro could not be removed - most likely caused by specifying + * a non-existent (or previously removed) synchro number. + * + * @code + * status_code = ompi_registry.cancel_synchro(synch_number); + * @endcode + */ +typedef int (*mca_gpr_base_module_cancel_synchro_fn_t)(ompi_registry_notify_id_t synch_number); + +/* Output the registry's contents to an output stream + * For debugging purposes, it is helpful to be able to obtain a complete formatted printout + * of the registry's contents. This function provides that ability. + * + * @param output_id The output stream id to which the registry's contents are to be + * printed. + * + * @retval None + * + * @code + * ompi_registry.dump(output_id); + * @endcode + */ +typedef void (*mca_gpr_base_module_dump_fn_t)(int output_id); + +/* Assume ownership of a segment. + * Although each segment of the registry can contain data from anywhere, each segment is "owned" + * by a specific job at any given time. This allows the registry to associate a segment with a jobid, + * thus enabling support for startup and shutdown processes. Transferring ownership of registry + * segments can occur when the segment is shared by multiple jobs, one or more of which subsequently + * terminate. In this case, another job must assume "ownership" of the segment. + * + * @param segment A character string indicating the segment for which this process is + * assuming ownership. + * + * @retval OMPI_SUCCESS Ownership successfully transferred. + * @retval OMPI_ERROR Ownership could not be transferred, most likely due to specifying a non-existing + * segment (or one that has been previously removed). + * + * @code + * status_code = ompi_registry.assume_ownership(segment); + * @endcode + */ +typedef int (*mca_gpr_base_module_assume_ownership_fn_t)(char *segment); + +/* Deliver a notify message. + * The registry generates notify messages whenever a subscription or synchro is fired. Normally, + * this happens completely "under the covers" - i.e., the notification process is transparent + * to the rest of the system, with the message simply delivered to the specified callback function. + * However, there are two circumstances when the system needs to explicitly deliver a notify + * message - namely, during startup and shutdown. In these two cases, a special message is + * "xcast" to all processes, with each process receiving the identical message. In order to + * ensure that the correct data gets to each subsystem, the message must be disassembled and + * the appropriate callback function called. + * + * This, unfortunately, means that the decoder must explicitly call the message notification + * subsystem in order to find the callback function. Alternatively, the entire startup/shutdown + * logic could be buried in the registry, but this violates the design philosophy of the registry + * acting solely as a publish/subscribe-based cache memory - it should not contain logic pertinent + * to any usage of that memory. + * + * This function provides the necessary "hook" for an external program to request delivery of + * a message via the publish/subscribe's notify mechanism. + * + * @param state The notify action associated with the message. In this case, only two values are + * supported: OMPI_REGISTRY_NOTIFY_ON_STARTUP and OMPI_REGISTRY_NOTIFY_ON_SHUTDOWN. The function + * will search the notification system for all requests that match this state and also match + * the segment name specified in the message itself. Each of the matching requests will be + * called with the message. + * + * @param message The message to be delivered. + * + * @retval None + * + * @code + * ompi_registry.deliver_notify_msg(state, message); + * @endcode + * + */ +typedef void (*mca_gpr_base_module_deliver_notify_msg_fn_t)(ompi_registry_notify_action_t state, + ompi_registry_notify_message_t *message); /* * test interface for internal functions - optional to provide */ @@ -249,11 +828,25 @@ struct mca_gpr_base_module_1_0_0_t { mca_gpr_base_module_unsubscribe_fn_t unsubscribe; mca_gpr_base_module_synchro_fn_t synchro; mca_gpr_base_module_cancel_synchro_fn_t cancel_synchro; - mca_gpr_base_module_delete_fn_t delete_object; + mca_gpr_base_module_delete_object_fn_t delete_object; mca_gpr_base_module_index_fn_t index; mca_gpr_base_module_test_internals_fn_t test_internals; - mca_gpr_base_module_register_fn_t rte_register; - mca_gpr_base_module_unregister_fn_t rte_unregister; + mca_gpr_base_module_begin_compound_cmd_fn_t begin_compound_cmd; + mca_gpr_base_module_stop_compound_cmd_fn_t stop_compound_cmd; + mca_gpr_base_module_exec_compound_cmd_fn_t exec_compound_cmd; + mca_gpr_base_module_dump_fn_t dump; + mca_gpr_base_module_silent_mode_on_fn_t silent_mode_on; + mca_gpr_base_module_silent_mode_off_fn_t silent_mode_off; + mca_gpr_base_module_notify_off_fn_t notify_off; + mca_gpr_base_module_notify_on_fn_t notify_on; + mca_gpr_base_module_assume_ownership_fn_t assume_ownership; + mca_gpr_base_module_triggers_active_fn_t triggers_active; + mca_gpr_base_module_triggers_inactive_fn_t triggers_inactive; + mca_gpr_base_module_get_startup_msg_fn_t get_startup_msg; + mca_gpr_base_module_get_shutdown_msg_fn_t get_shutdown_msg; + mca_gpr_base_module_cleanup_job_fn_t cleanup_job; + mca_gpr_base_module_cleanup_proc_fn_t cleanup_process; + mca_gpr_base_module_deliver_notify_msg_fn_t deliver_notify_msg; }; typedef struct mca_gpr_base_module_1_0_0_t mca_gpr_base_module_1_0_0_t; typedef mca_gpr_base_module_1_0_0_t mca_gpr_base_module_t; @@ -263,9 +856,9 @@ typedef mca_gpr_base_module_1_0_0_t mca_gpr_base_module_t; */ typedef mca_gpr_base_module_t* (*mca_gpr_base_component_init_fn_t)( - bool *allow_multi_user_threads, - bool *have_hidden_threads, - int *priority); + bool *allow_multi_user_threads, + bool *have_hidden_threads, + int *priority); typedef int (*mca_gpr_base_component_finalize_fn_t)(void); @@ -275,11 +868,11 @@ typedef int (*mca_gpr_base_component_finalize_fn_t)(void); struct mca_gpr_base_component_1_0_0_t { - mca_base_component_t gpr_version; - mca_base_component_data_1_0_0_t gpr_data; + mca_base_component_t gpr_version; + mca_base_component_data_1_0_0_t gpr_data; - mca_gpr_base_component_init_fn_t gpr_init; - mca_gpr_base_component_finalize_fn_t gpr_finalize; + mca_gpr_base_component_init_fn_t gpr_init; + mca_gpr_base_component_finalize_fn_t gpr_finalize; }; typedef struct mca_gpr_base_component_1_0_0_t mca_gpr_base_component_1_0_0_t; typedef mca_gpr_base_component_1_0_0_t mca_gpr_base_component_t; @@ -291,10 +884,10 @@ typedef mca_gpr_base_component_1_0_0_t mca_gpr_base_component_t; /* * Macro for use in modules that are of type coll v1.0.0 */ -#define MCA_GPR_BASE_VERSION_1_0_0 \ - /* gpr v1.0 is chained to MCA v1.0 */ \ - MCA_BASE_VERSION_1_0_0, \ - /* gpr v1.0 */ \ - "gpr", 1, 0, 0 +#define MCA_GPR_BASE_VERSION_1_0_0 \ + /* gpr v1.0 is chained to MCA v1.0 */ \ + MCA_BASE_VERSION_1_0_0, \ + /* gpr v1.0 */ \ + "gpr", 1, 0, 0 #endif diff --git a/src/mca/gpr/proxy/Makefile.am b/src/mca/gpr/proxy/Makefile.am index 895f323281..ef37e5ca79 100644 --- a/src/mca/gpr/proxy/Makefile.am +++ b/src/mca/gpr/proxy/Makefile.am @@ -7,9 +7,19 @@ include $(top_ompi_srcdir)/config/Makefile.options sources = \ - gpr_proxy.c \ - gpr_proxy.h \ - gpr_proxy_component.c + gpr_proxy_component.c \ + gpr_proxy_compound_cmd.c \ + gpr_proxy_del_index.c \ + gpr_proxy_cleanup.c \ + gpr_proxy_dump.c \ + gpr_proxy_messaging.c \ + gpr_proxy_internals.c \ + gpr_proxy_put_get.c \ + gpr_proxy_xmit_alerts.c \ + gpr_proxy_mode_ops.c \ + gpr_proxy_subscribe.c \ + gpr_proxy_synchro.c \ + gpr_proxy.h # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la diff --git a/src/mca/gpr/proxy/gpr_proxy.c b/src/mca/gpr/proxy/gpr_proxy.c deleted file mode 100644 index 96c89a48d7..0000000000 --- a/src/mca/gpr/proxy/gpr_proxy.c +++ /dev/null @@ -1,1179 +0,0 @@ -/* - * $HEADER$ - */ -/** @file: - * - */ - -#include "ompi_config.h" - -#include - -#include "threads/mutex.h" - -#include "util/output.h" -#include "util/proc_info.h" -#include "util/sys_info.h" - -#include "mca/mca.h" -#include "mca/gpr/base/base.h" -#include "gpr_proxy.h" -#include "runtime/runtime.h" - -/** - * globals - */ - -/* - * Implemented registry functions - */ - - -int gpr_proxy_delete_segment(char *segment) -{ - ompi_buffer_t cmd; - ompi_buffer_t answer; - mca_gpr_cmd_flag_t command; - int recv_tag; - int32_t response; - - command = MCA_GPR_DELETE_SEGMENT_CMD; - recv_tag = MCA_OOB_TAG_GPR; - - if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ - return OMPI_ERROR; - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &command, 1, MCA_GPR_OOB_PACK_CMD)) { - return OMPI_ERROR; - } - - if (OMPI_SUCCESS != ompi_pack_string(cmd, segment)) { - return OMPI_ERROR; - } - - if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { - return OMPI_ERROR; - } - - - if (0 > mca_oob_recv_packed(mca_gpr_my_replica, &answer, &recv_tag)) { - return OMPI_ERROR; - } - - if ((OMPI_SUCCESS != ompi_unpack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) - || (MCA_GPR_DELETE_SEGMENT_CMD != command)) { - ompi_buffer_free(answer); - return OMPI_ERROR; - } - - if (OMPI_SUCCESS != ompi_unpack(answer, &response, 1, OMPI_INT32)) { - ompi_buffer_free(answer); - return OMPI_ERROR; - } else { - ompi_buffer_free(answer); - return (int)response; - } - return OMPI_ERROR; -} - - -int gpr_proxy_put(ompi_registry_mode_t mode, char *segment, - char **tokens, ompi_registry_object_t object, - ompi_registry_object_size_t size) -{ - ompi_buffer_t cmd; - ompi_buffer_t answer; - mca_gpr_cmd_flag_t command; - char **tokptr; - int recv_tag, i, ret; - int32_t num_tokens, object_size; - int32_t response; - - if (mca_gpr_proxy_debug) { - ompi_output(0, "gpr_proxy_put: entered for segment %s 1st token %s", segment, *tokens); - } - - command = MCA_GPR_PUT_CMD; - recv_tag = MCA_OOB_TAG_GPR; - - if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ - return OMPI_ERROR; - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &command, 1, MCA_GPR_OOB_PACK_CMD)) { - return OMPI_ERROR; - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &mode, 1, MCA_GPR_OOB_PACK_MODE)) { - return OMPI_ERROR; - } - - if (OMPI_SUCCESS != ompi_pack_string(cmd, segment)) { - return OMPI_ERROR; - } - - /* compute number of tokens */ - tokptr = tokens; - num_tokens = 0; - while (NULL != *tokptr) { - num_tokens++; - tokptr++; - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &num_tokens, 1, OMPI_INT32)) { - return OMPI_ERROR; - } - - tokptr = tokens; - for (i=0; icellid, - ompi_rte_get_self()->jobid, ompi_rte_get_self()->vpid); - if (NULL == mca_gpr_my_replica) { - ompi_output(0, "\tBAD REPLICA"); - } - } - - if (0 > (ret = mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0))) { - if (mca_gpr_proxy_debug) { - ompi_output(0, "gpr_proxy_put: send failed with return %d", ret); - } - return OMPI_ERROR; - } - - if (mca_gpr_proxy_debug) { - ompi_output(0, "[%d,%d,%d] gpr_proxy_put: send complete", ompi_rte_get_self()->cellid, - ompi_rte_get_self()->jobid, ompi_rte_get_self()->vpid); - } - - if (0 > mca_oob_recv_packed(mca_gpr_my_replica, &answer, &recv_tag)) { - return OMPI_ERROR; - } - - if ((OMPI_SUCCESS != ompi_unpack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) - || (MCA_GPR_PUT_CMD != command)) { - ompi_buffer_free(answer); - return OMPI_ERROR; - } - - if (OMPI_SUCCESS != ompi_unpack(answer, &response, 1, OMPI_INT32)) { - ompi_buffer_free(answer); - return OMPI_ERROR; - } else { - ompi_buffer_free(answer); - return (int)response; - } - return OMPI_ERROR; -} - - -int gpr_proxy_delete_object(ompi_registry_mode_t mode, - char *segment, char **tokens) -{ - ompi_buffer_t cmd; - ompi_buffer_t answer; - mca_gpr_cmd_flag_t command; - char **tokptr; - int recv_tag, i; - int32_t num_tokens, response; - - if (mca_gpr_proxy_debug) { - ompi_output(0, "[%d,%d,%d] gpr_proxy_delete_object", - ompi_rte_get_self()->cellid, ompi_rte_get_self()->jobid, ompi_rte_get_self()->vpid); - } - - /* need to protect against errors */ - if (NULL == segment || NULL == tokens || NULL == *tokens) { - return OMPI_ERROR; - } - - command = MCA_GPR_DELETE_OBJECT_CMD; - recv_tag = MCA_OOB_TAG_GPR; - - if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ - return OMPI_ERROR; - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &command, 1, MCA_GPR_OOB_PACK_CMD)) { - goto CLEANUP; - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &mode, 1, MCA_GPR_OOB_PACK_MODE)) { - goto CLEANUP; - } - - if (OMPI_SUCCESS != ompi_pack_string(cmd, segment)) { - goto CLEANUP; - } - - /* compute number of tokens */ - tokptr = tokens; - num_tokens = 0; - while (NULL != *tokptr) { - num_tokens++; - tokptr++; - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &num_tokens, 1, OMPI_INT32)) { - goto CLEANUP; - } - - tokptr = tokens; - for (i=0; icellid, ompi_rte_get_self()->jobid, ompi_rte_get_self()->vpid); - } - - if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { - goto CLEANUP; - } - - if (mca_gpr_proxy_debug) { - ompi_output(0, "[%d,%d,%d] gpr_proxy_delete_object: calling mca_oob_recv_packed", - ompi_rte_get_self()->cellid, ompi_rte_get_self()->jobid, ompi_rte_get_self()->vpid); - } - - if (0 > mca_oob_recv_packed(mca_gpr_my_replica, &answer, &recv_tag)) { - goto CLEANUP; - } - - if (mca_gpr_proxy_debug) { - ompi_output(0, "[%d,%d,%d] gpr_proxy_delete_object: mca_oob_recv_packed returned", - ompi_rte_get_self()->cellid, ompi_rte_get_self()->jobid, ompi_rte_get_self()->vpid); - } - - - if ((OMPI_SUCCESS != ompi_unpack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) - || (MCA_GPR_DELETE_OBJECT_CMD != command)) { - ompi_buffer_free(answer); - goto CLEANUP; - } - - if (OMPI_SUCCESS != ompi_unpack(answer, &response, 1, OMPI_INT32)) { - ompi_buffer_free(answer); - if (mca_gpr_proxy_debug) { - ompi_output(0, "[%d,%d,%d] gpr_proxy_delete_object: unable to unpack response", - ompi_rte_get_self()->cellid, ompi_rte_get_self()->jobid, ompi_rte_get_self()->vpid); - } - return OMPI_ERROR; - } else { - if (mca_gpr_proxy_debug) { - ompi_output(0, "[%d,%d,%d] gpr_proxy_delete_object: returning with status %d", - ompi_rte_get_self()->cellid, ompi_rte_get_self()->jobid, ompi_rte_get_self()->vpid, response); - } - ompi_buffer_free(answer); - return (int)response; - } - - CLEANUP: - if (mca_gpr_proxy_debug) { - ompi_output(0, "[%d,%d,%d] gpr_proxy_delete_object: cleanup\n", - ompi_rte_get_self()->cellid, ompi_rte_get_self()->jobid, ompi_rte_get_self()->vpid); - } - ompi_buffer_free(cmd); - return OMPI_ERROR; -} - - -ompi_list_t* gpr_proxy_index(char *segment) -{ - ompi_list_t *return_list; - ompi_buffer_t cmd; - ompi_buffer_t answer; - mca_gpr_cmd_flag_t command; - char *string1; - int recv_tag, i; - int32_t num_responses; - ompi_registry_mode_t mode; - ompi_registry_index_value_t *newptr; - - return_list = OBJ_NEW(ompi_list_t); - - command = MCA_GPR_INDEX_CMD; - recv_tag = MCA_OOB_TAG_GPR; - - if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ - return return_list; - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &command, 1, MCA_GPR_OOB_PACK_CMD)) { - goto CLEANUP; - } - - if (NULL == segment) { /* no segment specified - want universe dict */ - mode = 0; - if (OMPI_SUCCESS != ompi_pack(cmd, &mode, 1, MCA_GPR_OOB_PACK_MODE)) { - goto CLEANUP; - } - } else { - mode = 1; - if (OMPI_SUCCESS != ompi_pack(cmd, &mode, 1, MCA_GPR_OOB_PACK_MODE)) { - goto CLEANUP; - } - if (OMPI_SUCCESS != ompi_pack_string(cmd, segment)) { - goto CLEANUP; - } - } - - if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { - goto CLEANUP; - } - - if (0 > mca_oob_recv_packed(mca_gpr_my_replica, &answer, &recv_tag)) { - goto CLEANUP; - } - - if ((OMPI_SUCCESS != ompi_unpack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) - || (MCA_GPR_INDEX_CMD != command)) { - ompi_buffer_free(answer); - goto CLEANUP; - } - - if ((OMPI_SUCCESS != ompi_unpack(answer, &num_responses, 1, OMPI_INT32)) || - (0 >= num_responses)) { - ompi_buffer_free(answer); - goto CLEANUP; - } - - for (i=0; i ompi_unpack_string(answer, &string1)) { - ompi_buffer_free(answer); - goto CLEANUP; - } - newptr = OBJ_NEW(ompi_registry_index_value_t); - newptr->token = strdup(string1); - ompi_list_append(return_list, &newptr->item); - } - - - CLEANUP: - ompi_buffer_free(cmd); - return return_list; -} - - -int gpr_proxy_subscribe(ompi_registry_mode_t mode, - ompi_registry_notify_action_t action, - char *segment, char **tokens, - ompi_registry_notify_cb_fn_t cb_func, void *user_tag) -{ - ompi_buffer_t cmd; - ompi_buffer_t answer; - mca_gpr_cmd_flag_t command; - char **tokptr; - int recv_tag, i; - int32_t num_tokens, response; - mca_gpr_notify_id_t idtag; - - recv_tag = MCA_OOB_TAG_GPR; - - /* need to protect against errors */ - if (NULL == segment) { - return OMPI_ERROR; - } - - command = MCA_GPR_SUBSCRIBE_CMD; - - if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ - return OMPI_ERROR; - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &command, 1, MCA_GPR_OOB_PACK_CMD)) { - goto CLEANUP; - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &mode, 1, MCA_GPR_OOB_PACK_MODE)) { - goto CLEANUP; - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &action, 1, MCA_GPR_OOB_PACK_ACTION)) { - goto CLEANUP; - } - - if (OMPI_SUCCESS != ompi_pack_string(cmd, segment)) { - goto CLEANUP; - } - - num_tokens = 0; - if (NULL != tokens) { - /* compute number of tokens */ - tokptr = tokens; - while (NULL != *tokptr) { - num_tokens++; - tokptr++; - } - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &num_tokens, 1, OMPI_INT32)) { - goto CLEANUP; - } - - if (0 < num_tokens) { - tokptr = tokens; - for (i=0; i mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { - goto CLEANUP; - } - - if (0 > mca_oob_recv_packed(mca_gpr_my_replica, &answer, &recv_tag)) { - goto CLEANUP; - } - - if ((OMPI_SUCCESS != ompi_unpack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) - || (MCA_GPR_SUBSCRIBE_CMD != command)) { - ompi_buffer_free(answer); - goto CLEANUP; - } - - if ((OMPI_SUCCESS != ompi_unpack(answer, &response, 1, OMPI_INT32)) || - (OMPI_SUCCESS != response)) { - ompi_buffer_free(answer); - goto CLEANUP; - } - - ompi_buffer_free(answer); - ompi_buffer_free(cmd); - return OMPI_SUCCESS; - - CLEANUP: - ompi_buffer_free(cmd); - return OMPI_ERROR; -} - - -int gpr_proxy_unsubscribe(ompi_registry_mode_t mode, - ompi_registry_notify_action_t action, - char *segment, char **tokens) -{ - ompi_buffer_t cmd; - ompi_buffer_t answer; - mca_gpr_cmd_flag_t command; - char **tokptr; - int recv_tag, i; - int32_t num_tokens, response; - - /* need to protect against errors */ - if (NULL == segment) { - return OMPI_ERROR; - } - - command = MCA_GPR_UNSUBSCRIBE_CMD; - - if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ - return OMPI_ERROR; - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &command, 1, MCA_GPR_OOB_PACK_CMD)) { - goto CLEANUP; - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &mode, 1, MCA_GPR_OOB_PACK_MODE)) { - goto CLEANUP; - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &action, 1, MCA_GPR_OOB_PACK_ACTION)) { - goto CLEANUP; - } - - if (OMPI_SUCCESS != ompi_pack_string(cmd, segment)) { - goto CLEANUP; - } - - num_tokens = 0; - if (NULL != tokens) { - /* compute number of tokens */ - tokptr = tokens; - while (NULL != *tokptr) { - num_tokens++; - tokptr++; - } - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &num_tokens, 1, OMPI_INT32)) { - goto CLEANUP; - } - - if (0 < num_tokens) { - tokptr = tokens; - for (i=0; i mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { - goto CLEANUP; - } - - if (0 > mca_oob_recv_packed(mca_gpr_my_replica, &answer, &recv_tag)) { - goto CLEANUP; - } - - if ((OMPI_SUCCESS != ompi_unpack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) - || (MCA_GPR_UNSUBSCRIBE_CMD != command)) { - ompi_buffer_free(answer); - goto CLEANUP; - } - - if ((OMPI_SUCCESS != ompi_unpack(answer, &response, 1, OMPI_INT32)) || - (OMPI_SUCCESS != response)) { - ompi_buffer_free(answer); - goto CLEANUP; - } - - if (MCA_GPR_NOTIFY_ID_MAX == response) { /* got an error on replica */ - goto CLEANUP; - } - - OMPI_THREAD_LOCK(&mca_gpr_proxy_mutex); - gpr_proxy_remove_notify_request(response); - ompi_buffer_free(answer); - ompi_buffer_free(cmd); - OMPI_THREAD_UNLOCK(&mca_gpr_proxy_mutex); - return OMPI_SUCCESS; - - CLEANUP: - ompi_buffer_free(cmd); - return OMPI_ERROR; -} - -int gpr_proxy_synchro(ompi_registry_synchro_mode_t synchro_mode, - ompi_registry_mode_t mode, - char *segment, char **tokens, int trigger, - ompi_registry_notify_cb_fn_t cb_func, void *user_tag) -{ - ompi_buffer_t cmd; - ompi_buffer_t answer; - mca_gpr_cmd_flag_t command; - char **tokptr; - int recv_tag, i; - int32_t num_tokens, response; - mca_gpr_notify_id_t idtag; - - - /* need to protect against errors */ - if (NULL == segment) { - return OMPI_ERROR; - } - - command = MCA_GPR_SYNCHRO_CMD; - recv_tag = MCA_OOB_TAG_GPR; - - if (OMPI_REGISTRY_SYNCHRO_MODE_NONE == synchro_mode) { /* not allowed */ - return OMPI_ERROR; - } - - if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ - return OMPI_ERROR; - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &command, 1, MCA_GPR_OOB_PACK_CMD)) { - goto CLEANUP; - } - - response = (int32_t)synchro_mode; - if (OMPI_SUCCESS != ompi_pack(cmd, &response, 1, OMPI_INT32)) { - goto CLEANUP; - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &mode, 1, MCA_GPR_OOB_PACK_MODE)) { - goto CLEANUP; - } - - if (OMPI_SUCCESS != ompi_pack_string(cmd, segment)) { - goto CLEANUP; - } - - num_tokens = 0; - if (NULL != tokens) { - /* compute number of tokens */ - tokptr = tokens; - while (NULL != *tokptr) { - num_tokens++; - tokptr++; - } - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &num_tokens, 1, OMPI_INT32)) { - goto CLEANUP; - } - - if (0 < num_tokens) { - tokptr = tokens; - for (i=0; i mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { - goto CLEANUP; - } - - if (0 > mca_oob_recv_packed(mca_gpr_my_replica, &answer, &recv_tag)) { - goto CLEANUP; - } - - if ((OMPI_SUCCESS != ompi_unpack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) - || (MCA_GPR_SYNCHRO_CMD != command)) { - ompi_buffer_free(answer); - goto CLEANUP; - } - - if ((OMPI_SUCCESS != ompi_unpack(answer, &response, 1, OMPI_INT32)) || - (OMPI_SUCCESS != response)) { - ompi_buffer_free(answer); - goto CLEANUP; - } - - ompi_buffer_free(answer); - ompi_buffer_free(cmd); - return OMPI_SUCCESS; - - CLEANUP: - gpr_proxy_remove_notify_request(idtag); - ompi_buffer_free(cmd); - return OMPI_ERROR; - -} - -int gpr_proxy_cancel_synchro(ompi_registry_synchro_mode_t synchro_mode, - ompi_registry_mode_t addr_mode, - char *segment, char **tokens, int trigger) -{ - ompi_buffer_t cmd; - ompi_buffer_t answer; - mca_gpr_cmd_flag_t command; - char **tokptr; - int recv_tag, i; - int32_t num_tokens, response; - - /* need to protect against errors */ - if (NULL == segment) { - return OMPI_ERROR; - } - - command = MCA_GPR_CANCEL_SYNCHRO_CMD; - - if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ - return OMPI_ERROR; - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &command, 1, MCA_GPR_OOB_PACK_CMD)) { - goto CLEANUP; - } - - response = (int32_t)synchro_mode; - if (OMPI_SUCCESS != ompi_pack(cmd, &response, 1, OMPI_INT32)) { - goto CLEANUP; - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &addr_mode, 1, MCA_GPR_OOB_PACK_MODE)) { - goto CLEANUP; - } - - if (OMPI_SUCCESS != ompi_pack_string(cmd, segment)) { - goto CLEANUP; - } - - num_tokens = 0; - if (NULL != tokens) { - /* compute number of tokens */ - tokptr = tokens; - while (NULL != *tokptr) { - num_tokens++; - tokptr++; - } - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &num_tokens, 1, OMPI_INT32)) { - goto CLEANUP; - } - - if (0 < num_tokens) { - tokptr = tokens; - for (i=0; i mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { - goto CLEANUP; - } - - if (0 > mca_oob_recv_packed(mca_gpr_my_replica, &answer, &recv_tag)) { - goto CLEANUP; - } - - if ((OMPI_SUCCESS != ompi_unpack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) - || (MCA_GPR_CANCEL_SYNCHRO_CMD != command)) { - ompi_buffer_free(answer); - goto CLEANUP; - } - - if ((OMPI_SUCCESS != ompi_unpack(answer, &response, 1, OMPI_INT32)) || - (OMPI_SUCCESS != response)) { - ompi_buffer_free(answer); - goto CLEANUP; - } - - if (MCA_GPR_NOTIFY_ID_MAX == response) { /* got an error on replica */ - goto CLEANUP; - } - - OMPI_THREAD_LOCK(&mca_gpr_proxy_mutex); - gpr_proxy_remove_notify_request(response); - ompi_buffer_free(answer); - ompi_buffer_free(cmd); - OMPI_THREAD_UNLOCK(&mca_gpr_proxy_mutex); - return OMPI_SUCCESS; - - CLEANUP: - ompi_buffer_free(cmd); - return OMPI_ERROR; -} - - -ompi_list_t* gpr_proxy_get(ompi_registry_mode_t mode, char *segment, char **tokens) -{ - ompi_buffer_t cmd; - ompi_buffer_t answer; - mca_gpr_cmd_flag_t command; - char **tokptr; - int recv_tag, i; - int32_t num_tokens, object_size, num_responses; - ompi_registry_value_t *newptr; - ompi_registry_object_t *object; - ompi_list_t *returned_list; - - returned_list = OBJ_NEW(ompi_list_t); - - /* need to protect against errors */ - if (NULL == segment || NULL == tokens || NULL == *tokens) { - return returned_list; - } - - command = MCA_GPR_GET_CMD; - recv_tag = MCA_OOB_TAG_GPR; - - if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ - return returned_list; - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &command, 1, MCA_GPR_OOB_PACK_CMD)) { - goto CLEANUP; - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &mode, 1, MCA_GPR_OOB_PACK_MODE)) { - goto CLEANUP; - } - - if (OMPI_SUCCESS != ompi_pack_string(cmd, segment)) { - goto CLEANUP; - } - - /* compute number of tokens */ - tokptr = tokens; - num_tokens = 0; - while (NULL != *tokptr) { - num_tokens++; - tokptr++; - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &num_tokens, 1, OMPI_INT32)) { - goto CLEANUP; - } - - tokptr = tokens; - for (i=0; i mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { - goto CLEANUP; - } - - - if (0 > mca_oob_recv_packed(mca_gpr_my_replica, &answer, &recv_tag)) { - goto CLEANUP; - } - - if ((OMPI_SUCCESS != ompi_unpack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) - || (MCA_GPR_GET_CMD != command)) { - ompi_buffer_free(answer); - goto CLEANUP; - } - - if ((OMPI_SUCCESS != ompi_unpack(answer, &num_responses, 1, OMPI_INT32)) || - (0 >= num_responses)) { - ompi_buffer_free(answer); - goto CLEANUP; - } - - for (i=0; iobject_size = object_size; - newptr->object = object; - ompi_list_append(returned_list, &newptr->item); - } - - CLEANUP: - ompi_buffer_free(cmd); - return returned_list; -} - -int gpr_proxy_rte_register(char *contact_info, size_t num_procs, - ompi_registry_notify_cb_fn_t start_cb_func, void *start_user_tag, - ompi_registry_notify_cb_fn_t end_cb_func, void *end_user_tag) -{ - ompi_buffer_t cmd; - ompi_buffer_t answer; - mca_gpr_cmd_flag_t command; - mca_gpr_notify_id_t local_idtag1, local_idtag2; - int recv_tag; - int32_t response; - - command = MCA_GPR_RTE_REGISTER_CMD; - recv_tag = MCA_OOB_TAG_GPR; - - - if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ - return OMPI_ERROR; - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &command, 1, MCA_GPR_OOB_PACK_CMD)) { - goto CLEANUP; - } - - if (OMPI_SUCCESS != ompi_pack_string(cmd, contact_info)) { - goto CLEANUP; - } - - - if (OMPI_SUCCESS != ompi_pack(cmd, (int32_t*)&num_procs, 1, OMPI_INT32)) { - goto CLEANUP; - } - - - if (OMPI_SUCCESS != ompi_pack(cmd, &ompi_process_info.pid, 1, OMPI_INT32)) { - goto CLEANUP; - } - - - if (OMPI_SUCCESS != ompi_pack_string(cmd, ompi_system_info.nodename)) { - goto CLEANUP; - } - - - OMPI_THREAD_LOCK(&mca_gpr_proxy_mutex); - /* store callback functions and user_tags in local list for lookup */ - /* generate id_tags to send to replica to identify lookup entry */ - - local_idtag1 = gpr_proxy_enter_notify_request(start_cb_func, start_user_tag); - - local_idtag2 = gpr_proxy_enter_notify_request(end_cb_func, end_user_tag); - - OMPI_THREAD_UNLOCK(&mca_gpr_proxy_mutex); - - if (OMPI_SUCCESS != ompi_pack(cmd, &local_idtag1, 1, OMPI_INT32)) { - goto CLEANUP; - } - - - if (OMPI_SUCCESS != ompi_pack(cmd, &local_idtag2, 1, OMPI_INT32)) { - goto CLEANUP; - } - - - if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { - goto CLEANUP; - } - - - if (0 > mca_oob_recv_packed(mca_gpr_my_replica, &answer, &recv_tag)) { - goto CLEANUP; - } - - if ((OMPI_SUCCESS != ompi_unpack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) || - (MCA_GPR_RTE_REGISTER_CMD != command)) { - ompi_buffer_free(answer); - goto CLEANUP; - } - - if (OMPI_SUCCESS != ompi_unpack(answer, &response, 1, OMPI_INT32) || - OMPI_SUCCESS != response) { - ompi_buffer_free(answer); - goto CLEANUP; - } - - /* success */ - ompi_buffer_free(answer); - return OMPI_SUCCESS; - - CLEANUP: - ompi_buffer_free(cmd); - return OMPI_ERROR; -} - - -int gpr_proxy_rte_unregister(char *proc_name_string) -{ - ompi_buffer_t cmd; - ompi_buffer_t answer; - mca_gpr_cmd_flag_t command; - int recv_tag; - int32_t response; - - command = MCA_GPR_RTE_UNREGISTER_CMD; - recv_tag = MCA_OOB_TAG_GPR; - - if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ - return OMPI_ERROR; - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &command, 1, MCA_GPR_OOB_PACK_CMD)) { - goto CLEANUP; - } - - if (OMPI_SUCCESS != ompi_pack_string(cmd, proc_name_string)) { - goto CLEANUP; - } - - if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { - goto CLEANUP; - } - - - if (0 > mca_oob_recv_packed(mca_gpr_my_replica, &answer, &recv_tag)) { - goto CLEANUP; - } - - if ((OMPI_SUCCESS != ompi_unpack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) - || (MCA_GPR_RTE_UNREGISTER_CMD != command)) { - ompi_buffer_free(answer); - goto CLEANUP; - } - - if (OMPI_SUCCESS != ompi_unpack(answer, &response, 1, OMPI_INT32) || - OMPI_SUCCESS != response) { - ompi_buffer_free(answer); - goto CLEANUP; - } - - /* success */ - ompi_buffer_free(answer); - return OMPI_SUCCESS; - - CLEANUP: - ompi_buffer_free(cmd); - return OMPI_ERROR; - -} - - -mca_gpr_notify_id_t gpr_proxy_enter_notify_request(ompi_registry_notify_cb_fn_t cb_func, - void *user_tag) -{ - mca_gpr_notify_request_tracker_t *trackptr; - mca_gpr_idtag_list_t *ptr_free_id; - - trackptr = OBJ_NEW(mca_gpr_notify_request_tracker_t); - trackptr->requestor = NULL; - trackptr->req_tag = 0; - trackptr->callback = cb_func; - trackptr->user_tag = user_tag; - if (ompi_list_is_empty(&mca_gpr_proxy_free_notify_id_tags)) { - trackptr->id_tag = mca_gpr_proxy_last_notify_id_tag; - mca_gpr_proxy_last_notify_id_tag++; - } else { - ptr_free_id = (mca_gpr_idtag_list_t*)ompi_list_remove_first(&mca_gpr_proxy_free_notify_id_tags); - trackptr->id_tag = ptr_free_id->id_tag; - } - ompi_list_append(&mca_gpr_proxy_notify_request_tracker, &trackptr->item); - - return trackptr->id_tag; -} - - -int gpr_proxy_remove_notify_request(mca_gpr_notify_id_t local_idtag) -{ - mca_gpr_notify_request_tracker_t *trackptr; - mca_gpr_idtag_list_t *ptr_free_id; - - /* locate corresponding entry on proxy tracker list and remove it */ - for (trackptr = (mca_gpr_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_proxy_notify_request_tracker); - trackptr != (mca_gpr_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_proxy_notify_request_tracker) && - trackptr->id_tag != local_idtag; - trackptr = (mca_gpr_notify_request_tracker_t*)ompi_list_get_next(trackptr)); - - if (trackptr == (mca_gpr_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_proxy_notify_request_tracker)) { - return OMPI_ERROR; - } - ompi_list_remove_item(&mca_gpr_proxy_notify_request_tracker, &trackptr->item); - /* put id tag on free list */ - ptr_free_id = OBJ_NEW(mca_gpr_idtag_list_t); - ptr_free_id->id_tag = trackptr->id_tag; - ompi_list_append(&mca_gpr_proxy_free_notify_id_tags, &ptr_free_id->item); - /* release tracker item */ - OBJ_RELEASE(trackptr); - - return OMPI_SUCCESS; -} - - -ompi_list_t* gpr_proxy_test_internals(int level) -{ - ompi_list_t *test_results=NULL; - ompi_buffer_t cmd, answer; - char **string1=NULL, **string2=NULL; - int i; - int32_t num_responses, test_level; - ompi_registry_internal_test_results_t *newptr=NULL; - mca_gpr_cmd_flag_t command; - int recv_tag; - - - test_results = OBJ_NEW(ompi_list_t); - test_level = (int32_t)level; - - command = MCA_GPR_TEST_INTERNALS_CMD; - recv_tag = MCA_OOB_TAG_GPR; - - if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ - return test_results; - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &command, 1, MCA_GPR_OOB_PACK_CMD)) { - goto CLEANUP; - } - - if (OMPI_SUCCESS != ompi_pack(cmd, &test_level, 1, OMPI_INT32)) { - goto CLEANUP; - } - - if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { - goto CLEANUP; - } - - - if (0 > mca_oob_recv_packed(mca_gpr_my_replica, &answer, &recv_tag)) { - goto CLEANUP; - } - - if ((OMPI_SUCCESS != ompi_unpack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) - || (MCA_GPR_TEST_INTERNALS_CMD != command)) { - ompi_buffer_free(answer); - goto CLEANUP; - } - - if ((OMPI_SUCCESS != ompi_unpack(answer, &num_responses, 1, OMPI_INT32)) || - (0 >= num_responses)) { - ompi_buffer_free(answer); - goto CLEANUP; - } - - for (i=0; i ompi_unpack_string(answer, string1)) { - ompi_buffer_free(answer); - goto CLEANUP; - } - if (0 > ompi_unpack_string(answer, string2)) { - ompi_buffer_free(answer); - goto CLEANUP; - } - newptr = OBJ_NEW(ompi_registry_internal_test_results_t); - newptr->test = strdup(*string1); - newptr->message = strdup(*string2); - ompi_list_append(test_results, &newptr->item); - } - ompi_buffer_free(answer); - - CLEANUP: - ompi_buffer_free(cmd); - return test_results; -} diff --git a/src/mca/gpr/proxy/gpr_proxy.h b/src/mca/gpr/proxy/gpr_proxy.h index a6f16d251c..3fcd4cc492 100644 --- a/src/mca/gpr/proxy/gpr_proxy.h +++ b/src/mca/gpr/proxy/gpr_proxy.h @@ -8,12 +8,7 @@ #include "ompi_config.h" -#include "include/types.h" -#include "include/constants.h" -#include "threads/mutex.h" - -#include "class/ompi_list.h" #include "mca/gpr/base/base.h" /* @@ -26,13 +21,28 @@ int mca_gpr_proxy_close(void); /* * Startup / Shutdown */ -mca_gpr_base_module_t* mca_gpr_proxy_init(bool *allow_multi_user_threads, bool *have_hidden_threads, int *priority); +mca_gpr_base_module_t* +mca_gpr_proxy_init(bool *allow_multi_user_threads, bool *have_hidden_threads, int *priority); + int mca_gpr_proxy_finalize(void); /* * proxy-local types */ +struct mca_gpr_proxy_notify_request_tracker_t { + ompi_list_item_t item; /**< Allows this item to be placed on a list */ + ompi_registry_notify_cb_fn_t callback; /**< Function to be called for notificaiton */ + void *user_tag; /**< User-provided tag for callback function */ + ompi_registry_notify_id_t local_idtag; /**< Local ID tag of associated subscription */ + ompi_registry_notify_id_t remote_idtag; /**< Remote ID tag of subscription */ + char *segment; /**< Pointer to name of segment */ + ompi_registry_notify_action_t action; /**< Action that triggers notification */ +}; +typedef struct mca_gpr_proxy_notify_request_tracker_t mca_gpr_proxy_notify_request_tracker_t; + +OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_gpr_proxy_notify_request_tracker_t); + /* * globals used within proxy component @@ -40,79 +50,150 @@ int mca_gpr_proxy_finalize(void); extern ompi_process_name_t *mca_gpr_my_replica; extern ompi_list_t mca_gpr_proxy_notify_request_tracker; -extern mca_gpr_notify_id_t mca_gpr_proxy_last_notify_id_tag; +extern ompi_registry_notify_id_t mca_gpr_proxy_last_notify_id_tag; extern ompi_list_t mca_gpr_proxy_free_notify_id_tags; extern int mca_gpr_proxy_debug; extern ompi_mutex_t mca_gpr_proxy_mutex; +extern bool mca_gpr_proxy_compound_cmd_mode; +extern ompi_buffer_t mca_gpr_proxy_compound_cmd; +extern ompi_mutex_t mca_gpr_proxy_wait_for_compound_mutex; +extern ompi_condition_t mca_gpr_proxy_compound_cmd_condition; +extern int mca_gpr_proxy_compound_cmd_waiting; +extern bool mca_gpr_proxy_silent_mode; /* - * Implementation of delete_segment(). + * Compound cmd functions */ - int gpr_proxy_delete_segment(char *segment); +int mca_gpr_proxy_begin_compound_cmd(void); + +int mca_gpr_proxy_stop_compound_cmd(void); + +ompi_list_t* mca_gpr_proxy_exec_compound_cmd(bool return_requested); /* - * Implementation of put() + * Mode operations */ -int gpr_proxy_put(ompi_registry_mode_t mode, char *segment, +void mca_gpr_proxy_silent_mode_on(void); + +void mca_gpr_proxy_silent_mode_off(void); + +void mca_gpr_proxy_notify_off(ompi_registry_notify_id_t sub_number); + +void mca_gpr_proxy_notify_on(ompi_registry_notify_id_t sub_number); + +void mca_gpr_proxy_triggers_active(mca_ns_base_jobid_t jobid); + +void mca_gpr_proxy_triggers_inactive(mca_ns_base_jobid_t jobid); + +int mca_gpr_proxy_assume_ownership(char *segment); + +/* + * Delete-index functions + */ +int mca_gpr_proxy_delete_segment(char *segment); + +int mca_gpr_proxy_delete_object(ompi_registry_mode_t mode, + char *segment, char **tokens); + +ompi_list_t* mca_gpr_proxy_index(char *segment); + +void mca_gpr_proxy_cleanup(mca_ns_base_jobid_t jobid); + + +/* + * Cleanup functions + */ +void mca_gpr_proxy_cleanup_job(mca_ns_base_jobid_t jobid); + +void mca_gpr_proxy_cleanup_proc(bool purge, ompi_process_name_t *proc); + + +/* + * Put-get functions + */ +int mca_gpr_proxy_put(ompi_registry_mode_t mode, char *segment, char **tokens, ompi_registry_object_t object, ompi_registry_object_size_t size); -/* - * Implementation of delete() - */ -int gpr_proxy_delete_object(ompi_registry_mode_t mode, - char *segment, char **tokens); +ompi_list_t* mca_gpr_proxy_get(ompi_registry_mode_t mode, + char *segment, char **tokens); -/* - * Implementation of index() - */ -ompi_list_t* gpr_proxy_index(char *segment); /* - * Implementation of subscribe() + * Subscribe functions */ -int gpr_proxy_subscribe(ompi_registry_mode_t mode, +ompi_registry_notify_id_t +mca_gpr_proxy_subscribe(ompi_registry_mode_t mode, ompi_registry_notify_action_t action, char *segment, char **tokens, ompi_registry_notify_cb_fn_t cb_func, void *user_tag); -int gpr_proxy_unsubscribe(ompi_registry_mode_t mode, - ompi_registry_notify_action_t action, - char *segment, char **tokens); +int mca_gpr_proxy_unsubscribe(ompi_registry_notify_id_t sub_number); -int gpr_proxy_synchro(ompi_registry_synchro_mode_t synchro_mode, + +/* + * Synchro functions + */ +ompi_registry_notify_id_t +mca_gpr_proxy_synchro(ompi_registry_synchro_mode_t synchro_mode, ompi_registry_mode_t mode, char *segment, char **tokens, int trigger, ompi_registry_notify_cb_fn_t cb_func, void *user_tag); -int gpr_proxy_cancel_synchro(ompi_registry_synchro_mode_t synchro_mode, - ompi_registry_mode_t addr_mode, - char *segment, char **tokens, int trigger); +int mca_gpr_proxy_cancel_synchro(ompi_registry_notify_id_t synch_number); + + /* - * Implementation of get() + * Dump function */ -ompi_list_t* gpr_proxy_get(ompi_registry_mode_t mode, - char *segment, char **tokens); +void mca_gpr_proxy_dump(int output_id); -ompi_list_t* gpr_proxy_test_internals(int level); +/* + * Messaging functions + */ +void mca_gpr_proxy_deliver_notify_msg(ompi_registry_notify_action_t state, + ompi_registry_notify_message_t *message); + +/* + * Test internals + */ +ompi_list_t* mca_gpr_proxy_test_internals(int level); + + +/* + * Startup/shutdown functions + */ +ompi_buffer_t mca_gpr_proxy_get_startup_msg(mca_ns_base_jobid_t jobid, + ompi_list_t *recipients); + +ompi_buffer_t mca_gpr_proxy_get_shutdown_msg(mca_ns_base_jobid_t jobid, + ompi_list_t *recipients); + + +/* + * Functions that interface to the replica + */ void mca_gpr_proxy_notify_recv(int status, ompi_process_name_t* sender, ompi_buffer_t buffer, int tag, void* cbdata); -int gpr_proxy_rte_register(char *contact_info, size_t num_procs, - ompi_registry_notify_cb_fn_t start_cb_func, void *start_user_tag, - ompi_registry_notify_cb_fn_t end_cb_func, void *end_user_tag); -int gpr_proxy_rte_unregister(char *proc_name_string); +/* + * Internal functions + */ -/* internal functions */ +ompi_registry_notify_id_t +mca_gpr_proxy_enter_notify_request(char *segment, ompi_registry_notify_action_t action, + ompi_registry_notify_cb_fn_t cb_func, + void *user_tag); -mca_gpr_notify_id_t gpr_proxy_enter_notify_request(ompi_registry_notify_cb_fn_t cb_func, - void *user_tag); +ompi_registry_notify_id_t +mca_gpr_proxy_remove_notify_request(ompi_registry_notify_id_t local_idtag); -int gpr_proxy_remove_notify_request(mca_gpr_notify_id_t local_idtag); +int mca_gpr_proxy_set_remote_idtag(ompi_registry_notify_id_t local_idtag, + ompi_registry_notify_id_t remote_idtag); #endif diff --git a/src/mca/gpr/proxy/gpr_proxy_cleanup.c b/src/mca/gpr/proxy/gpr_proxy_cleanup.c new file mode 100644 index 0000000000..a02a97729b --- /dev/null +++ b/src/mca/gpr/proxy/gpr_proxy_cleanup.c @@ -0,0 +1,66 @@ +/* + * $HEADER$ + */ +/** @file: + * + * The Open MPI general purpose registry - implementation. + * + */ + +/* + * includes + */ + +#include "ompi_config.h" + +#include "gpr_proxy.h" + + +void mca_gpr_proxy_cleanup_job(mca_ns_base_jobid_t jobid) +{ + ompi_buffer_t cmd; + + if (mca_gpr_proxy_compound_cmd_mode) { + mca_gpr_base_pack_cleanup_job(mca_gpr_proxy_compound_cmd, jobid); + return; + } + + + if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ + return; + } + + if (OMPI_SUCCESS != mca_gpr_base_pack_cleanup_job(cmd, jobid)) { + return; + } + + mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0); + + ompi_buffer_free(cmd); + +} + + +void mca_gpr_proxy_cleanup_proc(bool purge, ompi_process_name_t *proc) +{ + ompi_buffer_t cmd; + + if (mca_gpr_proxy_compound_cmd_mode) { + mca_gpr_base_pack_cleanup_proc(mca_gpr_proxy_compound_cmd, purge, proc); + return; + } + + + if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ + return; + } + + if (OMPI_SUCCESS != mca_gpr_base_pack_cleanup_proc(cmd, purge, proc)) { + return; + } + + mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0); + + ompi_buffer_free(cmd); + +} diff --git a/src/mca/gpr/proxy/gpr_proxy_component.c b/src/mca/gpr/proxy/gpr_proxy_component.c index 37e02a5287..e20215aad6 100644 --- a/src/mca/gpr/proxy/gpr_proxy_component.c +++ b/src/mca/gpr/proxy/gpr_proxy_component.c @@ -13,16 +13,8 @@ */ #include "ompi_config.h" -#include "include/constants.h" - -#include "threads/mutex.h" - -#include "util/proc_info.h" -#include "util/output.h" -#include "mca/mca.h" -#include "mca/base/mca_base_param.h" -#include "mca/ns/base/base.h" #include "mca/gpr/base/base.h" + #include "gpr_proxy.h" @@ -51,18 +43,32 @@ OMPI_COMP_EXPORT mca_gpr_base_component_t mca_gpr_proxy_component = { * setup the function pointers for the module */ static mca_gpr_base_module_t mca_gpr_proxy = { - gpr_proxy_get, - gpr_proxy_put, - gpr_proxy_delete_segment, - gpr_proxy_subscribe, - gpr_proxy_unsubscribe, - gpr_proxy_synchro, - gpr_proxy_cancel_synchro, - gpr_proxy_delete_object, - gpr_proxy_index, - gpr_proxy_test_internals, - gpr_proxy_rte_register, - gpr_proxy_rte_unregister + mca_gpr_proxy_get, + mca_gpr_proxy_put, + mca_gpr_proxy_delete_segment, + mca_gpr_proxy_subscribe, + mca_gpr_proxy_unsubscribe, + mca_gpr_proxy_synchro, + mca_gpr_proxy_cancel_synchro, + mca_gpr_proxy_delete_object, + mca_gpr_proxy_index, + mca_gpr_proxy_test_internals, + mca_gpr_proxy_begin_compound_cmd, + mca_gpr_proxy_stop_compound_cmd, + mca_gpr_proxy_exec_compound_cmd, + mca_gpr_proxy_dump, + mca_gpr_proxy_silent_mode_on, + mca_gpr_proxy_silent_mode_off, + mca_gpr_proxy_notify_off, + mca_gpr_proxy_notify_on, + mca_gpr_proxy_assume_ownership, + mca_gpr_proxy_triggers_active, + mca_gpr_proxy_triggers_inactive, + mca_gpr_proxy_get_startup_msg, + mca_gpr_proxy_get_shutdown_msg, + mca_gpr_proxy_cleanup_job, + mca_gpr_proxy_cleanup_proc, + mca_gpr_proxy_deliver_notify_msg }; @@ -76,15 +82,47 @@ static bool initialized = false; */ ompi_process_name_t *mca_gpr_my_replica; ompi_list_t mca_gpr_proxy_notify_request_tracker; -mca_gpr_notify_id_t mca_gpr_proxy_last_notify_id_tag; +ompi_registry_notify_id_t mca_gpr_proxy_last_notify_id_tag; ompi_list_t mca_gpr_proxy_free_notify_id_tags; int mca_gpr_proxy_debug; ompi_mutex_t mca_gpr_proxy_mutex; +bool mca_gpr_proxy_compound_cmd_mode; +ompi_buffer_t mca_gpr_proxy_compound_cmd; +ompi_mutex_t mca_gpr_proxy_wait_for_compound_mutex; +ompi_condition_t mca_gpr_proxy_compound_cmd_condition; +int mca_gpr_proxy_compound_cmd_waiting; +bool mca_gpr_proxy_silent_mode; + + +/* constructor - used to initialize notify message instance */ +static void mca_gpr_proxy_notify_request_tracker_construct(mca_gpr_proxy_notify_request_tracker_t* req) +{ + req->callback = NULL; + req->user_tag = NULL; + req->local_idtag = OMPI_REGISTRY_NOTIFY_ID_MAX; + req->remote_idtag = OMPI_REGISTRY_NOTIFY_ID_MAX; + req->segment = NULL; + req->action = OMPI_REGISTRY_NOTIFY_NONE; +} + +/* destructor - used to free any resources held by instance */ +static void mca_gpr_proxy_notify_request_tracker_destructor(mca_gpr_proxy_notify_request_tracker_t* req) +{ + if (NULL != req->segment) { + free(req->segment); + } +} + +/* define instance of ompi_class_t */ +OBJ_CLASS_INSTANCE( + mca_gpr_proxy_notify_request_tracker_t, /* type name */ + ompi_list_item_t, /* parent "class" name */ + mca_gpr_proxy_notify_request_tracker_construct, /* constructor */ + mca_gpr_proxy_notify_request_tracker_destructor); /* destructor */ /* - * don't really need this function - could just put NULL in the above structure - * Just holding the place in case we decide there is something we need to do + * Open the component */ int mca_gpr_proxy_open(void) { @@ -97,7 +135,7 @@ int mca_gpr_proxy_open(void) } /* - * ditto for this one + * Close the component */ int mca_gpr_proxy_close(void) { @@ -132,8 +170,14 @@ mca_gpr_base_module_t* mca_gpr_proxy_init(bool *allow_multi_user_threads, bool * *allow_multi_user_threads = true; *have_hidden_threads = false; - /* setup thread lock */ + /* setup thread locks and condition variable */ OBJ_CONSTRUCT(&mca_gpr_proxy_mutex, ompi_mutex_t); + OBJ_CONSTRUCT(&mca_gpr_proxy_wait_for_compound_mutex, ompi_mutex_t); + OBJ_CONSTRUCT(&mca_gpr_proxy_compound_cmd_condition, ompi_condition_t); + + /* initialize the registry compound mode */ + mca_gpr_proxy_compound_cmd_mode = false; + mca_gpr_proxy_compound_cmd_waiting = 0; /* define the replica for us to use - get it from process_info */ mca_gpr_my_replica = ompi_name_server.copy_process_name(ompi_process_info.gpr_replica); @@ -141,11 +185,14 @@ mca_gpr_base_module_t* mca_gpr_proxy_init(bool *allow_multi_user_threads, bool * return NULL; } - /* initialize the notify list */ + /* initialize the notify request tracker */ OBJ_CONSTRUCT(&mca_gpr_proxy_notify_request_tracker, ompi_list_t); mca_gpr_proxy_last_notify_id_tag = 0; OBJ_CONSTRUCT(&mca_gpr_proxy_free_notify_id_tags, ompi_list_t); + /* initialize any local variables */ + mca_gpr_proxy_silent_mode = false; + /* issue the non-blocking receive */ rc = mca_oob_recv_packed_nb(MCA_OOB_NAME_ANY, MCA_OOB_TAG_GPR_NOTIFY, 0, mca_gpr_proxy_notify_recv, NULL); if(rc != OMPI_SUCCESS && rc != OMPI_ERR_NOT_IMPLEMENTED) { @@ -190,11 +237,12 @@ void mca_gpr_proxy_notify_recv(int status, ompi_process_name_t* sender, { char **tokptr; mca_gpr_cmd_flag_t command; - int32_t num_items, i, id_tag; + uint32_t num_items; + uint32_t i, id_tag; ompi_registry_value_t *regval; ompi_registry_notify_message_t *message; bool found; - mca_gpr_notify_request_tracker_t *trackptr; + mca_gpr_proxy_notify_request_tracker_t *trackptr; if (mca_gpr_proxy_debug) { ompi_output(0, "gpr proxy: received trigger message"); @@ -207,6 +255,15 @@ void mca_gpr_proxy_notify_recv(int status, ompi_process_name_t* sender, goto RETURN_ERROR; } + if (0 > ompi_unpack_string(buffer, &message->segment)) { + goto RETURN_ERROR; + } + + if (OMPI_SUCCESS != ompi_unpack(buffer, &i, 1, OMPI_INT32)) { + goto RETURN_ERROR; + } + message->owning_job = (mca_ns_base_jobid_t)i; + if (OMPI_SUCCESS != ompi_unpack(buffer, &id_tag, 1, OMPI_INT32)) { goto RETURN_ERROR; } @@ -259,26 +316,24 @@ void mca_gpr_proxy_notify_recv(int status, ompi_process_name_t* sender, /* find the request corresponding to this notify */ found = false; - for (trackptr = (mca_gpr_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_proxy_notify_request_tracker); - trackptr != (mca_gpr_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_proxy_notify_request_tracker); - trackptr = (mca_gpr_notify_request_tracker_t*)ompi_list_get_next(trackptr)) { - if (trackptr->id_tag == id_tag) { + for (trackptr = (mca_gpr_proxy_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_proxy_notify_request_tracker); + trackptr != (mca_gpr_proxy_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_proxy_notify_request_tracker) && !found; + trackptr = (mca_gpr_proxy_notify_request_tracker_t*)ompi_list_get_next(trackptr)) { + if (trackptr->local_idtag == id_tag) { found = true; - break; } } + OMPI_THREAD_UNLOCK(&mca_gpr_proxy_mutex); + if (!found) { /* didn't find request */ ompi_output(0, "Proxy notification error - received request not found"); - OMPI_THREAD_UNLOCK(&mca_gpr_proxy_mutex); return; } /* process request */ trackptr->callback(message, trackptr->user_tag); - OMPI_THREAD_UNLOCK(&mca_gpr_proxy_mutex); - /* dismantle message and free memory */ RETURN_ERROR: diff --git a/src/mca/gpr/proxy/gpr_proxy_compound_cmd.c b/src/mca/gpr/proxy/gpr_proxy_compound_cmd.c new file mode 100644 index 0000000000..b83383ba08 --- /dev/null +++ b/src/mca/gpr/proxy/gpr_proxy_compound_cmd.c @@ -0,0 +1,117 @@ +/* + * $HEADER$ + */ +/** @file: + * + * The Open MPI general purpose registry - implementation. + * + */ + +/* + * includes + */ + +#include "ompi_config.h" + +#include "gpr_proxy.h" + + +int mca_gpr_proxy_begin_compound_cmd(void) +{ + size_t size; + + OMPI_THREAD_LOCK(&mca_gpr_proxy_wait_for_compound_mutex); + + if (mca_gpr_proxy_compound_cmd_mode) { + mca_gpr_proxy_compound_cmd_waiting++; + ompi_condition_wait(&mca_gpr_proxy_compound_cmd_condition, &mca_gpr_proxy_wait_for_compound_mutex); + mca_gpr_proxy_compound_cmd_waiting--; + } + + mca_gpr_proxy_compound_cmd_mode = true; + ompi_buffer_size(mca_gpr_proxy_compound_cmd, &size); + if (0 < size) { + ompi_buffer_free(mca_gpr_proxy_compound_cmd); + } + ompi_buffer_init(&mca_gpr_proxy_compound_cmd, 0); + + OMPI_THREAD_UNLOCK(&mca_gpr_proxy_wait_for_compound_mutex); + return OMPI_SUCCESS; +} + + +int mca_gpr_proxy_stop_compound_cmd(void) +{ + size_t size; + + OMPI_THREAD_LOCK(&mca_gpr_proxy_wait_for_compound_mutex); + + mca_gpr_proxy_compound_cmd_mode = false; + ompi_buffer_size(mca_gpr_proxy_compound_cmd, &size); + if (0 < size) { + ompi_buffer_free(mca_gpr_proxy_compound_cmd); + } + + if (mca_gpr_proxy_compound_cmd_waiting) { + ompi_condition_signal(&mca_gpr_proxy_compound_cmd_condition); + } + + OMPI_THREAD_UNLOCK(&mca_gpr_proxy_wait_for_compound_mutex); + return OMPI_SUCCESS; +} + + +ompi_list_t* mca_gpr_proxy_exec_compound_cmd(bool return_requested) +{ + uint8_t tmp; + mca_gpr_cmd_flag_t command; + ompi_buffer_t answer; + int recv_tag=MCA_OOB_TAG_GPR; + size_t size; + + if (mca_gpr_proxy_debug) { + ompi_output(0, "[%d,%d,%d] transmitting compound command", + OMPI_NAME_ARGS(*ompi_rte_get_self())); + } + + OMPI_THREAD_LOCK(&mca_gpr_proxy_wait_for_compound_mutex); + + /** + * pack the exec_compound_cmd command and return_requested flag at the end of the buffer + * then send command off to be processed + */ + + command = MCA_GPR_COMPOUND_CMD; + ompi_pack(mca_gpr_proxy_compound_cmd, &command, 1, MCA_GPR_OOB_PACK_CMD); + + tmp = (uint8_t)return_requested; + ompi_pack(mca_gpr_proxy_compound_cmd, &tmp, 1, MCA_GPR_OOB_PACK_BOOL); + + if (0 > mca_oob_send_packed(mca_gpr_my_replica, mca_gpr_proxy_compound_cmd, MCA_OOB_TAG_GPR, 0)) { + goto CLEANUP; + } + + if (return_requested) { + if (0 > mca_oob_recv_packed(mca_gpr_my_replica, &answer, &recv_tag)) { + goto CLEANUP; + } + + /* RHC = need to figure out how to unpack the return message */ + } + + CLEANUP: + mca_gpr_proxy_compound_cmd_mode = false; + ompi_buffer_size(mca_gpr_proxy_compound_cmd, &size); + if (0 < size) { + ompi_buffer_free(mca_gpr_proxy_compound_cmd); + } + if (mca_gpr_proxy_compound_cmd_waiting) { + ompi_condition_signal(&mca_gpr_proxy_compound_cmd_condition); + } + + OMPI_THREAD_UNLOCK(&mca_gpr_proxy_wait_for_compound_mutex); + + return OMPI_SUCCESS; +} + + diff --git a/src/mca/gpr/proxy/gpr_proxy_del_index.c b/src/mca/gpr/proxy/gpr_proxy_del_index.c new file mode 100644 index 0000000000..4999dc361c --- /dev/null +++ b/src/mca/gpr/proxy/gpr_proxy_del_index.c @@ -0,0 +1,161 @@ +/* + * $HEADER$ + */ +/** @file: + * + */ + +#include "ompi_config.h" + +#include "gpr_proxy.h" + +/** + * globals + */ + +/* + * Implemented registry functions + */ + + +int mca_gpr_proxy_delete_segment(char *segment) +{ + ompi_buffer_t cmd; + ompi_buffer_t answer; + int recv_tag=MCA_OOB_TAG_GPR, response; + + if (mca_gpr_proxy_compound_cmd_mode) { + return mca_gpr_base_pack_delete_segment(mca_gpr_proxy_compound_cmd, + mca_gpr_proxy_silent_mode, segment); + } + + if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ + return OMPI_ERROR; + } + + response = OMPI_ERROR; + + if (OMPI_SUCCESS != mca_gpr_base_pack_delete_segment(cmd, mca_gpr_proxy_silent_mode, + segment)) { + goto CLEANUP; + } + + if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { + goto CLEANUP; + } + + if (mca_gpr_proxy_silent_mode) { + ompi_buffer_free(cmd); + return OMPI_SUCCESS; + } + + if (0 > mca_oob_recv_packed(mca_gpr_my_replica, &answer, &recv_tag)) { + goto CLEANUP; + } + + response = mca_gpr_base_unpack_delete_segment(answer); + ompi_buffer_free(answer); + + CLEANUP: + ompi_buffer_free(cmd); + return response; +} + + +int mca_gpr_proxy_delete_object(ompi_registry_mode_t mode, + char *segment, char **tokens) +{ + ompi_buffer_t cmd; + ompi_buffer_t answer; + int recv_tag=MCA_OOB_TAG_GPR; + int response; + + if (mca_gpr_proxy_debug) { + ompi_output(0, "[%d,%d,%d] gpr_proxy_delete_object", OMPI_NAME_ARGS(*ompi_rte_get_self())); + } + + /* need to protect against errors */ + if (NULL == segment || NULL == tokens || NULL == *tokens) { + return OMPI_ERROR; + } + + if (mca_gpr_proxy_compound_cmd_mode) { + return mca_gpr_base_pack_delete_object(mca_gpr_proxy_compound_cmd, + mca_gpr_proxy_silent_mode, + mode, segment, tokens); + } + + if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ + return OMPI_ERROR; + } + + response = OMPI_ERROR; + + if (OMPI_SUCCESS != mca_gpr_base_pack_delete_object(cmd, mca_gpr_proxy_silent_mode, + mode, segment, tokens)) { + goto CLEANUP; + } + + if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { + goto CLEANUP; + } + + if (mca_gpr_proxy_silent_mode) { + ompi_buffer_free(cmd); + return OMPI_SUCCESS; + } + + if (0 > mca_oob_recv_packed(mca_gpr_my_replica, &answer, &recv_tag)) { + goto CLEANUP; + } + + response = mca_gpr_base_unpack_delete_object(answer); + ompi_buffer_free(answer); + + + CLEANUP: + if (mca_gpr_proxy_debug) { + ompi_output(0, "[%d,%d,%d] gpr_proxy_delete_object: cleanup\n", OMPI_NAME_ARGS(*ompi_rte_get_self())); + } + ompi_buffer_free(cmd); + return response; +} + + +ompi_list_t* mca_gpr_proxy_index(char *segment) +{ + ompi_list_t *return_list; + ompi_buffer_t cmd; + ompi_buffer_t answer; + int recv_tag=MCA_OOB_TAG_GPR; + + return_list = OBJ_NEW(ompi_list_t); + + if (mca_gpr_proxy_compound_cmd_mode) { + mca_gpr_base_pack_index(mca_gpr_proxy_compound_cmd, segment); + return return_list; + } + + if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ + return return_list; + } + + if (OMPI_SUCCESS != mca_gpr_base_pack_index(cmd, segment)) { + goto CLEANUP; + } + + if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { + goto CLEANUP; + } + + if (0 > mca_oob_recv_packed(mca_gpr_my_replica, &answer, &recv_tag)) { + goto CLEANUP; + } + + mca_gpr_base_unpack_index(answer, return_list); + ompi_buffer_free(answer); + + CLEANUP: + ompi_buffer_free(cmd); + return return_list; +} diff --git a/src/mca/gpr/proxy/gpr_proxy_dump.c b/src/mca/gpr/proxy/gpr_proxy_dump.c new file mode 100644 index 0000000000..0c4bfe6324 --- /dev/null +++ b/src/mca/gpr/proxy/gpr_proxy_dump.c @@ -0,0 +1,71 @@ +/* + * $HEADER$ + */ +/** @file: + * + * The Open MPI general purpose registry - implementation. + * + */ + +/* + * includes + */ + +#include "ompi_config.h" + +#include +#include +#include +#include +#include + +#include "include/constants.h" + +#include "threads/mutex.h" + +#include "util/output.h" +#include "util/proc_info.h" +#include "util/sys_info.h" + +#include "mca/gpr/base/base.h" +#include "gpr_proxy.h" + +void mca_gpr_proxy_dump(int output_id) +{ + mca_gpr_cmd_flag_t command; + ompi_buffer_t cmd; + ompi_buffer_t answer; + int recv_tag=MCA_OOB_TAG_GPR; + + if (mca_gpr_proxy_compound_cmd_mode) { + mca_gpr_base_pack_dump(mca_gpr_proxy_compound_cmd); + return; + } + + + if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ + return; + } + + if (OMPI_SUCCESS != mca_gpr_base_pack_dump(cmd)) { + return; + } + + if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { + return; + } + + + if (0 > mca_oob_recv_packed(mca_gpr_my_replica, &answer, &recv_tag)) { + return; + } + + if ((OMPI_SUCCESS != ompi_unpack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) || + (MCA_GPR_DUMP_CMD != command)) { + return; + } + + mca_gpr_base_print_dump(answer, output_id); + ompi_buffer_free(answer); + return; +} diff --git a/src/mca/gpr/proxy/gpr_proxy_internals.c b/src/mca/gpr/proxy/gpr_proxy_internals.c new file mode 100644 index 0000000000..26ace8a07a --- /dev/null +++ b/src/mca/gpr/proxy/gpr_proxy_internals.c @@ -0,0 +1,138 @@ +/* + * $HEADER$ + */ +/** @file: + * + * The Open MPI general purpose registry - implementation. + * + */ + +/* + * includes + */ + +#include "ompi_config.h" + +#include "gpr_proxy.h" + +ompi_registry_notify_id_t +mca_gpr_proxy_enter_notify_request(char *segment, + ompi_registry_notify_action_t action, + ompi_registry_notify_cb_fn_t cb_func, + void *user_tag) +{ + mca_gpr_proxy_notify_request_tracker_t *trackptr; + mca_gpr_idtag_list_t *ptr_free_id; + + trackptr = OBJ_NEW(mca_gpr_proxy_notify_request_tracker_t); + trackptr->segment = strdup(segment); + trackptr->action = action; + trackptr->callback = cb_func; + trackptr->user_tag = user_tag; + trackptr->remote_idtag = OMPI_REGISTRY_NOTIFY_ID_MAX; + if (ompi_list_is_empty(&mca_gpr_proxy_free_notify_id_tags)) { + trackptr->local_idtag = mca_gpr_proxy_last_notify_id_tag; + mca_gpr_proxy_last_notify_id_tag++; + } else { + ptr_free_id = (mca_gpr_idtag_list_t*)ompi_list_remove_first(&mca_gpr_proxy_free_notify_id_tags); + trackptr->local_idtag = ptr_free_id->id_tag; + } + ompi_list_append(&mca_gpr_proxy_notify_request_tracker, &trackptr->item); + + return trackptr->local_idtag; +} + + +ompi_registry_notify_id_t +mca_gpr_proxy_remove_notify_request(ompi_registry_notify_id_t local_idtag) +{ + mca_gpr_proxy_notify_request_tracker_t *trackptr; + mca_gpr_idtag_list_t *ptr_free_id; + ompi_registry_notify_id_t remote_idtag; + + /* locate corresponding entry on proxy tracker list and remove it */ + for (trackptr = (mca_gpr_proxy_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_proxy_notify_request_tracker); + trackptr != (mca_gpr_proxy_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_proxy_notify_request_tracker) && + trackptr->local_idtag != local_idtag; + trackptr = (mca_gpr_proxy_notify_request_tracker_t*)ompi_list_get_next(trackptr)); + + if (trackptr == (mca_gpr_proxy_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_proxy_notify_request_tracker)) { + return OMPI_REGISTRY_NOTIFY_ID_MAX; + } + + remote_idtag = trackptr->remote_idtag; + ompi_list_remove_item(&mca_gpr_proxy_notify_request_tracker, &trackptr->item); + + /* put id tag on free list */ + ptr_free_id = OBJ_NEW(mca_gpr_idtag_list_t); + ptr_free_id->id_tag = trackptr->local_idtag; + ompi_list_append(&mca_gpr_proxy_free_notify_id_tags, &ptr_free_id->item); + + /* release tracker item */ + OBJ_RELEASE(trackptr); + + return remote_idtag; +} + + +int mca_gpr_proxy_set_remote_idtag(ompi_registry_notify_id_t local_idtag, + ompi_registry_notify_id_t remote_idtag) +{ + mca_gpr_proxy_notify_request_tracker_t *trackptr; + + /* locate corresponding entry on proxy tracker list */ + for (trackptr = (mca_gpr_proxy_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_proxy_notify_request_tracker); + trackptr != (mca_gpr_proxy_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_proxy_notify_request_tracker) && + trackptr->local_idtag != local_idtag; + trackptr = (mca_gpr_proxy_notify_request_tracker_t*)ompi_list_get_next(trackptr)); + + if (trackptr == (mca_gpr_proxy_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_proxy_notify_request_tracker)) { + return OMPI_ERROR; + } + + trackptr->remote_idtag = remote_idtag; + return OMPI_SUCCESS; +} + + +ompi_list_t* mca_gpr_proxy_test_internals(int level) +{ + ompi_list_t *test_results=NULL; + ompi_buffer_t cmd, answer; + int recv_tag; + + test_results = OBJ_NEW(ompi_list_t); + + if (mca_gpr_proxy_compound_cmd_mode) { + mca_gpr_base_pack_test_internals(mca_gpr_proxy_compound_cmd, level); + return test_results; + } + + + if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ + return test_results; + } + + if (OMPI_SUCCESS != mca_gpr_base_pack_test_internals(cmd, level)) { + goto CLEANUP; + } + + if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { + goto CLEANUP; + } + + + if (0 > mca_oob_recv_packed(mca_gpr_my_replica, &answer, &recv_tag)) { + goto CLEANUP; + } + + if (OMPI_SUCCESS != mca_gpr_base_unpack_test_internals(answer, test_results)) { + /* clear any partial results from the list */ + } + + ompi_buffer_free(answer); + + CLEANUP: + ompi_buffer_free(cmd); + return test_results; +} diff --git a/src/mca/gpr/proxy/gpr_proxy_messaging.c b/src/mca/gpr/proxy/gpr_proxy_messaging.c new file mode 100644 index 0000000000..60725961d0 --- /dev/null +++ b/src/mca/gpr/proxy/gpr_proxy_messaging.c @@ -0,0 +1,52 @@ +/* -*- C -*- + * + * $HEADER$ + * + */ +/** @file + */ + +#include "ompi_config.h" + +#include "gpr_proxy.h" + + +void mca_gpr_proxy_deliver_notify_msg(ompi_registry_notify_action_t state, + ompi_registry_notify_message_t *message) +{ + int namelen; + mca_gpr_proxy_notify_request_tracker_t *trackptr; + + if (mca_gpr_proxy_debug) { + if (OMPI_REGISTRY_NOTIFY_ON_STARTUP == state) { + ompi_output(0, "[%d,%d,%d] special delivery of startup msg", + OMPI_NAME_ARGS(*ompi_rte_get_self())); + } else { + ompi_output(0, "[%d,%d,%d] special delivery of shutdown msg", + OMPI_NAME_ARGS(*ompi_rte_get_self())); + } + } + + /* protect system from threadlock */ + if ((OMPI_REGISTRY_NOTIFY_ON_STARTUP & state) || + (OMPI_REGISTRY_NOTIFY_ON_SHUTDOWN & state)) { + + OMPI_THREAD_LOCK(&mca_gpr_proxy_mutex); + + namelen = strlen(message->segment); + + /* find the request corresponding to this notify */ + for (trackptr = (mca_gpr_proxy_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_proxy_notify_request_tracker); + trackptr != (mca_gpr_proxy_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_proxy_notify_request_tracker); + trackptr = (mca_gpr_proxy_notify_request_tracker_t*)ompi_list_get_next(trackptr)) { + if ((trackptr->action & state) && + (0 == strcmp(message->segment, trackptr->segment))) { + OMPI_THREAD_UNLOCK(&mca_gpr_proxy_mutex); + /* process request - callback function responsible for releasing memory */ + trackptr->callback(message, trackptr->user_tag); + return; + } + } + } + OBJ_RELEASE(message); +} diff --git a/src/mca/gpr/proxy/gpr_proxy_mode_ops.c b/src/mca/gpr/proxy/gpr_proxy_mode_ops.c new file mode 100644 index 0000000000..53ed6972af --- /dev/null +++ b/src/mca/gpr/proxy/gpr_proxy_mode_ops.c @@ -0,0 +1,215 @@ +/* -*- C -*- + * + * $HEADER$ + */ +/** @file: + * + * The Open MPI General Purpose Registry - Replica component + * + */ + +/* + * includes + */ +#include "ompi_config.h" + +#include "gpr_proxy.h" + + +void mca_gpr_proxy_silent_mode_on(void) +{ + OMPI_THREAD_LOCK(&mca_gpr_proxy_mutex); + mca_gpr_proxy_silent_mode = true; + OMPI_THREAD_UNLOCK(&mca_gpr_proxy_mutex); +} + +void mca_gpr_proxy_silent_mode_off(void) +{ + OMPI_THREAD_LOCK(&mca_gpr_proxy_mutex); + mca_gpr_proxy_silent_mode = false; + OMPI_THREAD_UNLOCK(&mca_gpr_proxy_mutex); +} + +void mca_gpr_proxy_notify_off(ompi_registry_notify_id_t sub_number) +{ + ompi_buffer_t cmd; + + if (mca_gpr_proxy_debug) { + ompi_output(0, "gpr_proxy_notify_off entered for sub_number %X", sub_number); + } + + if (mca_gpr_proxy_compound_cmd_mode) { + mca_gpr_base_pack_notify_off(mca_gpr_proxy_compound_cmd, + ompi_rte_get_self(), sub_number); + return; + } + + if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { + return; + } + if (OMPI_SUCCESS != mca_gpr_base_pack_notify_off(cmd, + ompi_rte_get_self(), sub_number)) { + goto CLEANUP; + } + + if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { + goto CLEANUP; + } + + CLEANUP: + ompi_buffer_free(cmd); + return; +} + +void mca_gpr_proxy_notify_on(ompi_registry_notify_id_t sub_number) +{ + ompi_buffer_t cmd; + + if (mca_gpr_proxy_debug) { + ompi_output(0, "gpr_proxy_notify_on entered for sub_number %X", sub_number); + } + + if (mca_gpr_proxy_compound_cmd_mode) { + mca_gpr_base_pack_notify_on(mca_gpr_proxy_compound_cmd, + ompi_rte_get_self(), sub_number); + return; + } + + if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { + return; + } + if (OMPI_SUCCESS != mca_gpr_base_pack_notify_on(cmd, + ompi_rte_get_self(), sub_number)) { + goto CLEANUP; + } + + if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { + goto CLEANUP; + } + + CLEANUP: + ompi_buffer_free(cmd); + return; +} + +void mca_gpr_proxy_triggers_active(mca_ns_base_jobid_t jobid) +{ + ompi_buffer_t cmd; + + if (mca_gpr_proxy_compound_cmd_mode) { + mca_gpr_base_pack_triggers_active_cmd(mca_gpr_proxy_compound_cmd, jobid); + return; + } + + if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ + return; + } + + if (OMPI_SUCCESS != mca_gpr_base_pack_triggers_active_cmd(cmd, jobid)) { + goto CLEANUP; + } + + if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { + goto CLEANUP; + } + + CLEANUP: + if (mca_gpr_proxy_debug) { + ompi_output(0, "[%d,%d,%d] gpr_proxy_triggers_active: cleanup\n", + OMPI_NAME_ARGS(*ompi_rte_get_self())); + } + ompi_buffer_free(cmd); + return; +} + +void mca_gpr_proxy_triggers_inactive(mca_ns_base_jobid_t jobid) +{ + ompi_buffer_t cmd; + + if (mca_gpr_proxy_compound_cmd_mode) { + mca_gpr_base_pack_triggers_inactive_cmd(mca_gpr_proxy_compound_cmd, jobid); + return; + } + + if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ + return; + } + + if (OMPI_SUCCESS != mca_gpr_base_pack_triggers_inactive_cmd(cmd, jobid)) { + goto CLEANUP; + } + + if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { + goto CLEANUP; + } + + CLEANUP: + if (mca_gpr_proxy_debug) { + ompi_output(0, "[%d,%d,%d] gpr_proxy_triggers_active: cleanup\n", + OMPI_NAME_ARGS(*ompi_rte_get_self())); + } + ompi_buffer_free(cmd); + return; +} + + +int mca_gpr_proxy_assume_ownership(char *segment) +{ + ompi_buffer_t cmd, answer; + mca_gpr_cmd_flag_t command; + int recv_tag=MCA_OOB_TAG_GPR; + int32_t response; + mca_ns_base_jobid_t jobid; + + if (mca_gpr_proxy_debug) { + ompi_output(0, "gpr_proxy_assume_ownership entered for segment %s", segment); + } + + jobid = ompi_name_server.get_jobid(ompi_rte_get_self()); + + if (mca_gpr_proxy_compound_cmd_mode) { + return mca_gpr_base_pack_assume_ownership(mca_gpr_proxy_compound_cmd, + mca_gpr_proxy_silent_mode, + jobid, segment); + } + + if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { + return OMPI_ERROR; + } + if (OMPI_SUCCESS != mca_gpr_base_pack_assume_ownership(cmd, mca_gpr_proxy_silent_mode, + jobid, segment)) { + goto CLEANUP; + } + + if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { + goto CLEANUP; + } + + + if (mca_gpr_proxy_silent_mode) { + return OMPI_SUCCESS; + } else { + if (0 > mca_oob_recv_packed(mca_gpr_my_replica, &answer, &recv_tag)) { + goto CLEANUP; + } + + if ((OMPI_SUCCESS != ompi_unpack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) + || (MCA_GPR_ASSUME_OWNERSHIP_CMD != command)) { + ompi_buffer_free(answer); + goto CLEANUP; + } + + if (OMPI_SUCCESS != ompi_unpack(answer, &response, 1, OMPI_INT32)) { + ompi_buffer_free(answer); + goto CLEANUP; + } else { + ompi_buffer_free(cmd); + ompi_buffer_free(answer); + return (int)response; + } + } + + CLEANUP: + ompi_buffer_free(cmd); + return OMPI_ERROR; +} diff --git a/src/mca/gpr/proxy/gpr_proxy_put_get.c b/src/mca/gpr/proxy/gpr_proxy_put_get.c new file mode 100644 index 0000000000..848ec154fe --- /dev/null +++ b/src/mca/gpr/proxy/gpr_proxy_put_get.c @@ -0,0 +1,128 @@ +/* + * $HEADER$ + */ +/** @file: + * + * The Open MPI general purpose registry - implementation. + * + */ + +/* + * includes + */ + +#include "ompi_config.h" + +#include "gpr_proxy.h" + +int mca_gpr_proxy_put(ompi_registry_mode_t mode, char *segment, + char **tokens, ompi_registry_object_t object, + ompi_registry_object_size_t size) +{ + ompi_buffer_t cmd; + ompi_buffer_t answer; + int recv_tag=MCA_OOB_TAG_GPR; + int response; + + if (mca_gpr_proxy_debug) { + ompi_output(0, "gpr_proxy_put: entered for segment %s 1st token %s", segment, *tokens); + } + + if (mca_gpr_proxy_compound_cmd_mode) { + return mca_gpr_base_pack_put(mca_gpr_proxy_compound_cmd, mca_gpr_proxy_silent_mode, + mode, segment, tokens, object, size); + } + + + if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ + return OMPI_ERROR; + } + + response = OMPI_ERROR; + + if (OMPI_SUCCESS != mca_gpr_base_pack_put(cmd, mca_gpr_proxy_silent_mode, + mode, segment, tokens, object, size)) { + goto CLEANUP; + } + + if (mca_gpr_proxy_debug) { + ompi_output(0, "[%d,%d,%d] gpr_proxy_put: initiating send", + OMPI_NAME_ARGS(*ompi_rte_get_self())); + if (NULL == mca_gpr_my_replica) { + ompi_output(0, "\tBAD REPLICA"); + } + } + + if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { + if (mca_gpr_proxy_debug) { + ompi_output(0, "gpr_proxy_put: send failed"); + } + goto CLEANUP; + } + + if (mca_gpr_proxy_debug) { + ompi_output(0, "[%d,%d,%d] gpr_proxy_put: send complete", OMPI_NAME_ARGS(*ompi_rte_get_self())); + } + + if (mca_gpr_proxy_silent_mode) { + ompi_buffer_free(cmd); + return OMPI_SUCCESS; + } + + if (0 > mca_oob_recv_packed(mca_gpr_my_replica, &answer, &recv_tag)) { + goto CLEANUP; + } + + response = mca_gpr_base_unpack_put(answer); + ompi_buffer_free(answer); + +CLEANUP: + ompi_buffer_free(cmd); + return response; +} + + +ompi_list_t* mca_gpr_proxy_get(ompi_registry_mode_t mode, char *segment, char **tokens) +{ + ompi_buffer_t cmd; + ompi_buffer_t answer; + int recv_tag=MCA_OOB_TAG_GPR; + ompi_list_t *returned_list; + + returned_list = OBJ_NEW(ompi_list_t); + + /* need to protect against errors */ + if (NULL == segment || NULL == tokens || NULL == *tokens) { + return returned_list; + } + + if (mca_gpr_proxy_compound_cmd_mode) { + mca_gpr_base_pack_get(mca_gpr_proxy_compound_cmd, mode, segment, tokens); + return returned_list; + } + + + if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ + return returned_list; + } + + if (OMPI_SUCCESS != mca_gpr_base_pack_get(cmd, mode, segment, tokens)) { + goto CLEANUP; + } + + if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { + goto CLEANUP; + } + + + if (0 > mca_oob_recv_packed(mca_gpr_my_replica, &answer, &recv_tag)) { + goto CLEANUP; + } + + mca_gpr_base_unpack_get(answer, returned_list); + ompi_buffer_free(answer); + + CLEANUP: + ompi_buffer_free(cmd); + return returned_list; +} diff --git a/src/mca/gpr/proxy/gpr_proxy_subscribe.c b/src/mca/gpr/proxy/gpr_proxy_subscribe.c new file mode 100644 index 0000000000..123e9f8905 --- /dev/null +++ b/src/mca/gpr/proxy/gpr_proxy_subscribe.c @@ -0,0 +1,166 @@ +/* + * $HEADER$ + */ +/** @file: + * + * The Open MPI general purpose registry - implementation. + * + */ + +/* + * includes + */ + +#include "ompi_config.h" + +#include "gpr_proxy.h" + +ompi_registry_notify_id_t +mca_gpr_proxy_subscribe(ompi_registry_mode_t mode, + ompi_registry_notify_action_t action, + char *segment, char **tokens, + ompi_registry_notify_cb_fn_t cb_func, void *user_tag) +{ + ompi_buffer_t cmd; + ompi_buffer_t answer; + int recv_tag=MCA_OOB_TAG_GPR; + ompi_registry_notify_id_t idtag, response, remote_idtag; + + /* need to protect against errors */ + if (NULL == segment) { + return OMPI_REGISTRY_NOTIFY_ID_MAX; + } + + if (mca_gpr_proxy_compound_cmd_mode) { + if (OMPI_SUCCESS != mca_gpr_base_pack_subscribe(mca_gpr_proxy_compound_cmd, + mode, action, segment, tokens)) { + return OMPI_REGISTRY_NOTIFY_ID_MAX; + } + + OMPI_THREAD_LOCK(&mca_gpr_proxy_mutex); + + /* store callback function and user_tag in local list for lookup */ + /* generate id_tag to send to replica to identify lookup entry */ + idtag = mca_gpr_proxy_enter_notify_request(segment, action, cb_func, user_tag); + + OMPI_THREAD_UNLOCK(&mca_gpr_proxy_mutex); + + if (OMPI_SUCCESS != ompi_pack(mca_gpr_proxy_compound_cmd, &idtag, 1, MCA_GPR_OOB_PACK_NOTIFY_ID)) { + return OMPI_REGISTRY_NOTIFY_ID_MAX; + } + return idtag; + } + + + if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ + return OMPI_REGISTRY_NOTIFY_ID_MAX; + } + + response = OMPI_REGISTRY_NOTIFY_ID_MAX; + + if (OMPI_SUCCESS != mca_gpr_base_pack_subscribe(cmd, mode, action, segment, tokens)) { + goto CLEANUP; + } + + OMPI_THREAD_LOCK(&mca_gpr_proxy_mutex); + + /* store callback function and user_tag in local list for lookup */ + /* generate id_tag to send to replica to identify lookup entry */ + idtag = mca_gpr_proxy_enter_notify_request(segment, action, cb_func, user_tag); + + OMPI_THREAD_UNLOCK(&mca_gpr_proxy_mutex); + + if (OMPI_SUCCESS != ompi_pack(cmd, &idtag, 1, MCA_GPR_OOB_PACK_NOTIFY_ID)) { + goto CLEANUP; + } + + if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { + goto CLEANUP; + } + + if (0 > mca_oob_recv_packed(mca_gpr_my_replica, &answer, &recv_tag)) { + goto CLEANUP; + } + + if (OMPI_SUCCESS != mca_gpr_base_unpack_subscribe(answer, &remote_idtag)) { + if (mca_gpr_proxy_debug) { + ompi_output(0, "proxy_subscribe: unable to unpack"); + } + OMPI_THREAD_LOCK(&mca_gpr_proxy_mutex); + mca_gpr_proxy_remove_notify_request(idtag); + OMPI_THREAD_UNLOCK(&mca_gpr_proxy_mutex); + + response = OMPI_REGISTRY_NOTIFY_ID_MAX; + + } else { + response = remote_idtag; + mca_gpr_proxy_set_remote_idtag(idtag, remote_idtag); + } + + ompi_buffer_free(answer); + + CLEANUP: + ompi_buffer_free(cmd); + return response; +} + + +int mca_gpr_proxy_unsubscribe(ompi_registry_notify_id_t sub_number) +{ + ompi_buffer_t cmd; + ompi_buffer_t answer; + int recv_tag=MCA_OOB_TAG_GPR; + int response; + ompi_registry_notify_id_t remote_idtag; + + OMPI_THREAD_LOCK(&mca_gpr_proxy_mutex); + remote_idtag = mca_gpr_proxy_remove_notify_request(sub_number); + if (OMPI_REGISTRY_NOTIFY_ID_MAX == remote_idtag) { + return OMPI_ERROR; + } + response = mca_gpr_base_pack_unsubscribe(mca_gpr_proxy_compound_cmd, + mca_gpr_proxy_silent_mode, + remote_idtag); + OMPI_THREAD_UNLOCK(&mca_gpr_proxy_mutex); + + if (mca_gpr_proxy_compound_cmd_mode || OMPI_SUCCESS != response) { + return response; + } + + + if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ + return OMPI_ERROR; + } + + + if (OMPI_SUCCESS != mca_gpr_base_pack_unsubscribe(cmd, mca_gpr_proxy_silent_mode, + remote_idtag)) { + goto CLEANUP; + } + + if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { + goto CLEANUP; + } + + if (mca_gpr_proxy_silent_mode) { + goto COMPLETE; + } + + if (0 > mca_oob_recv_packed(mca_gpr_my_replica, &answer, &recv_tag)) { + goto CLEANUP; + } + + if (OMPI_SUCCESS != mca_gpr_base_unpack_unsubscribe(answer)) { /* got an error on replica */ + ompi_buffer_free(answer); + goto CLEANUP; + } + + COMPLETE: + ompi_buffer_free(answer); + ompi_buffer_free(cmd); + return OMPI_SUCCESS; + + CLEANUP: + ompi_buffer_free(cmd); + return OMPI_ERROR; +} diff --git a/src/mca/gpr/proxy/gpr_proxy_synchro.c b/src/mca/gpr/proxy/gpr_proxy_synchro.c new file mode 100644 index 0000000000..bf65aa5bae --- /dev/null +++ b/src/mca/gpr/proxy/gpr_proxy_synchro.c @@ -0,0 +1,161 @@ +/* + * $HEADER$ + */ +/** @file: + * + * The Open MPI general purpose registry - implementation. + * + */ + +/* + * includes + */ + +#include "ompi_config.h" + +#include "gpr_proxy.h" + +ompi_registry_notify_id_t +mca_gpr_proxy_synchro(ompi_registry_synchro_mode_t synchro_mode, + ompi_registry_mode_t mode, + char *segment, char **tokens, int trigger, + ompi_registry_notify_cb_fn_t cb_func, void *user_tag) +{ + ompi_buffer_t cmd; + ompi_buffer_t answer; + int recv_tag=MCA_OOB_TAG_GPR; + ompi_registry_notify_id_t idtag, remote_idtag, response; + + + if (mca_gpr_proxy_compound_cmd_mode) { + if (OMPI_SUCCESS != mca_gpr_base_pack_synchro(mca_gpr_proxy_compound_cmd, + synchro_mode, + mode, segment, tokens, trigger)) { + return OMPI_REGISTRY_NOTIFY_ID_MAX; + } + + OMPI_THREAD_LOCK(&mca_gpr_proxy_mutex); + /* store callback function and user_tag in local list for lookup */ + /* generate id_tag to send to replica to identify lookup entry */ + + idtag = mca_gpr_proxy_enter_notify_request(segment, OMPI_REGISTRY_NOTIFY_NONE, cb_func, user_tag); + + OMPI_THREAD_UNLOCK(&mca_gpr_proxy_mutex); + + if (OMPI_SUCCESS != ompi_pack(mca_gpr_proxy_compound_cmd, &idtag, 1, OMPI_INT32)) { + mca_gpr_proxy_remove_notify_request(idtag); + return OMPI_REGISTRY_NOTIFY_ID_MAX; + } + + return idtag; + } + + + if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ + return OMPI_REGISTRY_NOTIFY_ID_MAX; + } + + response = OMPI_REGISTRY_NOTIFY_ID_MAX; + + if (OMPI_SUCCESS != mca_gpr_base_pack_synchro(cmd, synchro_mode, mode, segment, tokens, trigger)) { + goto CLEANUP; + } + + OMPI_THREAD_LOCK(&mca_gpr_proxy_mutex); + /* store callback function and user_tag in local list for lookup */ + /* generate id_tag to send to replica to identify lookup entry */ + + idtag = mca_gpr_proxy_enter_notify_request(segment, OMPI_REGISTRY_NOTIFY_NONE, cb_func, user_tag); + + OMPI_THREAD_UNLOCK(&mca_gpr_proxy_mutex); + + if (OMPI_SUCCESS != ompi_pack(cmd, &idtag, 1, OMPI_INT32)) { + goto CLEANUP; + } + + if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { + goto CLEANUP; + } + + if (0 > mca_oob_recv_packed(mca_gpr_my_replica, &answer, &recv_tag)) { + goto CLEANUP; + } + + if (OMPI_SUCCESS != mca_gpr_base_unpack_synchro(answer, &remote_idtag)) { /* error on replica */ + + OMPI_THREAD_LOCK(&mca_gpr_proxy_mutex); + mca_gpr_proxy_remove_notify_request(idtag); + OMPI_THREAD_UNLOCK(&mca_gpr_proxy_mutex); + + response = OMPI_REGISTRY_NOTIFY_ID_MAX; + + } else { + response = idtag; + mca_gpr_proxy_set_remote_idtag(idtag, remote_idtag); + } + + ompi_buffer_free(answer); + + CLEANUP: + ompi_buffer_free(cmd); + return response; + +} + +int mca_gpr_proxy_cancel_synchro(ompi_registry_notify_id_t synch_number) +{ + ompi_buffer_t cmd; + ompi_buffer_t answer; + int recv_tag=MCA_OOB_TAG_GPR, response; + ompi_registry_notify_id_t remote_idtag; + + OMPI_THREAD_LOCK(&mca_gpr_proxy_mutex); + remote_idtag = mca_gpr_proxy_remove_notify_request(synch_number); + if (OMPI_REGISTRY_NOTIFY_ID_MAX == remote_idtag) { + return OMPI_ERROR; + } + response = mca_gpr_base_pack_cancel_synchro(mca_gpr_proxy_compound_cmd, + mca_gpr_proxy_silent_mode, + remote_idtag); + OMPI_THREAD_UNLOCK(&mca_gpr_proxy_mutex); + + if (mca_gpr_proxy_compound_cmd_mode || OMPI_SUCCESS != response) { + return response; + } + + + if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ + return OMPI_ERROR; + } + + if (OMPI_SUCCESS != mca_gpr_base_pack_cancel_synchro(cmd, mca_gpr_proxy_silent_mode, + remote_idtag)) { + goto CLEANUP; + } + + if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { + goto CLEANUP; + } + + if (mca_gpr_proxy_silent_mode) { + goto COMPLETE; + } + + if (0 > mca_oob_recv_packed(mca_gpr_my_replica, &answer, &recv_tag)) { + goto CLEANUP; + } + + if (OMPI_SUCCESS != mca_gpr_base_unpack_cancel_synchro(answer)) { /* got an error on replica */ + ompi_buffer_free(answer); + goto CLEANUP; + } + + COMPLETE: + ompi_buffer_free(answer); + ompi_buffer_free(cmd); + return OMPI_SUCCESS; + + CLEANUP: + ompi_buffer_free(cmd); + return OMPI_ERROR; +} diff --git a/src/mca/gpr/proxy/gpr_proxy_xmit_alerts.c b/src/mca/gpr/proxy/gpr_proxy_xmit_alerts.c new file mode 100644 index 0000000000..2d99cece6c --- /dev/null +++ b/src/mca/gpr/proxy/gpr_proxy_xmit_alerts.c @@ -0,0 +1,91 @@ +/* -*- C -*- + * + * $HEADER$ + */ +/** @file: + * + * The Open MPI General Purpose Registry - proxy component + * + */ + +/* + * includes + */ +#include "ompi_config.h" + +#include "gpr_proxy.h" + +ompi_buffer_t mca_gpr_proxy_get_startup_msg(mca_ns_base_jobid_t jobid, + ompi_list_t *recipients) +{ + ompi_buffer_t msg, cmd, answer; + int recv_tag=MCA_OOB_TAG_GPR; + + if (mca_gpr_proxy_compound_cmd_mode) { + mca_gpr_base_pack_get_startup_msg(mca_gpr_proxy_compound_cmd, jobid); + return NULL; + } + + if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ + return NULL; + } + + msg = NULL; + + if (OMPI_SUCCESS != mca_gpr_base_pack_get_startup_msg(cmd, jobid)) { + goto CLEANUP; + } + + if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { + goto CLEANUP; + } + + if (0 > mca_oob_recv_packed(mca_gpr_my_replica, &answer, &recv_tag)) { + goto CLEANUP; + } + + msg = mca_gpr_base_unpack_get_startup_msg(answer, recipients); + ompi_buffer_free(answer); + + CLEANUP: + ompi_buffer_free(cmd); + return msg; +} + + +ompi_buffer_t mca_gpr_proxy_get_shutdown_msg(mca_ns_base_jobid_t jobid, + ompi_list_t *recipients) +{ + ompi_buffer_t msg, cmd, answer; + int recv_tag=MCA_OOB_TAG_GPR; + + if (mca_gpr_proxy_compound_cmd_mode) { + mca_gpr_base_pack_get_shutdown_msg(mca_gpr_proxy_compound_cmd, jobid); + return NULL; + } + + if (OMPI_SUCCESS != ompi_buffer_init(&cmd, 0)) { /* got a problem */ + return NULL; + } + + msg = NULL; + + if (OMPI_SUCCESS != mca_gpr_base_pack_get_shutdown_msg(cmd, jobid)) { + goto CLEANUP; + } + + if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { + goto CLEANUP; + } + + if (0 > mca_oob_recv_packed(mca_gpr_my_replica, &answer, &recv_tag)) { + goto CLEANUP; + } + + msg = mca_gpr_base_unpack_get_shutdown_msg(answer, recipients); + ompi_buffer_free(answer); + + CLEANUP: + ompi_buffer_free(cmd); + return msg; +} diff --git a/src/mca/gpr/replica/Makefile.am b/src/mca/gpr/replica/Makefile.am index fee78e60e8..9471ef4e84 100644 --- a/src/mca/gpr/replica/Makefile.am +++ b/src/mca/gpr/replica/Makefile.am @@ -8,9 +8,22 @@ include $(top_ompi_srcdir)/config/Makefile.options sources = \ gpr_replica_component.c \ + gpr_replica_recv_proxy_msgs.c \ + gpr_replica_xmit_alerts.c \ gpr_replica.h \ - gpr_replica.c \ - gpr_replica_internals.c \ + gpr_replica_del_index.c \ + gpr_replica_put_get.c \ + gpr_replica_dump.c \ + gpr_replica_cleanup.c \ + gpr_replica_subscribe.c \ + gpr_replica_compound_cmd.c \ + gpr_replica_synchro.c \ + gpr_replica_mode_ops.c \ + gpr_replica_messaging.c \ + gpr_replica_internals_dict_ops.c \ + gpr_replica_internals_segment_ops.c \ + gpr_replica_internals_trigger_ops.c \ + gpr_replica_test_internals.c \ gpr_replica_internals.h # Make the output library in this directory, and name it either diff --git a/src/mca/gpr/replica/gpr_replica.c b/src/mca/gpr/replica/gpr_replica.c deleted file mode 100644 index 70628e4f38..0000000000 --- a/src/mca/gpr/replica/gpr_replica.c +++ /dev/null @@ -1,867 +0,0 @@ -/* - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - implementation. - * - */ - -/* - * includes - */ - -#include "ompi_config.h" - -#include -#include -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_LIBGEN_H -#include -#endif - -#include "include/constants.h" - -#include "threads/mutex.h" - -#include "util/output.h" -#include "util/proc_info.h" -#include "util/sys_info.h" - -#include "mca/gpr/base/base.h" -#include "gpr_replica.h" -#include "gpr_replica_internals.h" -#include "runtime/runtime.h" - - -int gpr_replica_delete_segment(char *segment) -{ - int rc; - OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); - rc = gpr_replica_delete_segment_nl(segment); - OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); - gpr_replica_process_callbacks(); - return rc; -} - - -int gpr_replica_delete_segment_nl(char *segment) -{ - mca_gpr_replica_segment_t *seg; - - if (mca_gpr_replica_debug) { - ompi_output(0, "[%d,%d,%d] gpr replica: delete_segment entered", ompi_rte_get_self()->cellid, - ompi_rte_get_self()->jobid, ompi_rte_get_self()->vpid); - } - - seg = gpr_replica_find_seg(true, segment); - - if (NULL == seg) { /* couldn't locate segment */ - return OMPI_ERROR; - } - - OBJ_RELEASE(seg); - - if (OMPI_SUCCESS != gpr_replica_delete_key(segment, NULL)) { /* couldn't remove dictionary entry */ - return OMPI_ERROR; - } - return OMPI_SUCCESS; -} - -int gpr_replica_put(ompi_registry_mode_t addr_mode, char *segment, - char **tokens, ompi_registry_object_t object, - ompi_registry_object_size_t size) -{ - int rc; - OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); - rc = gpr_replica_put_nl(addr_mode, segment, tokens, object, size); - OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); - gpr_replica_process_callbacks(); - return rc; -} - -int gpr_replica_put_nl(ompi_registry_mode_t addr_mode, char *segment, - char **tokens, ompi_registry_object_t object, - ompi_registry_object_size_t size) -{ - ompi_list_t *keylist; - mca_gpr_replica_keytable_t *keyptr; - mca_gpr_replica_segment_t *seg; - mca_gpr_replica_core_t *entry_ptr; - ompi_registry_mode_t put_mode; - mca_gpr_replica_trigger_list_t *trig; - ompi_registry_notify_message_t *notify_msg; - int return_code, num_tokens; - mca_gpr_replica_key_t *keys, *key2; - bool still_valid; - - - if (mca_gpr_replica_debug) { - ompi_output(0, "[%d,%d,%d] gpr replica: put entered on segment %s 1st token %s", - ompi_rte_get_self()->cellid, ompi_rte_get_self()->jobid, - ompi_rte_get_self()->vpid, segment, *tokens); - } - - /* protect ourselves against errors */ - if (NULL == segment || NULL == object || 0 == size || NULL == tokens || NULL == *tokens) { - if (mca_gpr_replica_debug) { - ompi_output(0, "[%d,%d,%d] gpr replica: error in input - put rejected", - ompi_rte_get_self()->cellid, ompi_rte_get_self()->jobid, - ompi_rte_get_self()->vpid); - } - return OMPI_ERROR; - } - - /* ignore addressing mode - all tokens are used - * only overwrite permission mode flag has any affect - */ - put_mode = addr_mode & OMPI_REGISTRY_OVERWRITE; - - /* find the segment */ - seg = gpr_replica_find_seg(true, segment); - if (NULL == seg) { /* couldn't find segment or create it */ - return OMPI_ERROR; - } - - /* convert tokens to list of keys */ - keylist = gpr_replica_get_key_list(segment, tokens); - if (0 >= (num_tokens = ompi_list_get_size(keylist))) { - return OMPI_ERROR; - } - - keys = (mca_gpr_replica_key_t*)malloc(num_tokens*sizeof(mca_gpr_replica_key_t)); - key2 = keys; - - /* traverse the list to find undefined tokens - get new keys for them */ - for (keyptr = (mca_gpr_replica_keytable_t*)ompi_list_get_first(keylist); - keyptr != (mca_gpr_replica_keytable_t*)ompi_list_get_end(keylist); - keyptr = (mca_gpr_replica_keytable_t*)ompi_list_get_next(keyptr)) { - if (MCA_GPR_REPLICA_KEY_MAX == keyptr->key) { /* need to get new key */ - keyptr->key = gpr_replica_define_key(segment, keyptr->token); - } - *key2 = keyptr->key; - key2++; - } - - /* see if specified entry already exists */ - for (entry_ptr = (mca_gpr_replica_core_t*)ompi_list_get_first(&seg->registry_entries); - entry_ptr != (mca_gpr_replica_core_t*)ompi_list_get_end(&seg->registry_entries); - entry_ptr = (mca_gpr_replica_core_t*)ompi_list_get_next(entry_ptr)) { - if (gpr_replica_check_key_list(put_mode, num_tokens, keys, - entry_ptr->num_keys, entry_ptr->keys)) { - /* found existing entry - overwrite if mode set, else error */ - if (put_mode) { /* overwrite enabled */ - free(entry_ptr->object); - entry_ptr->object = NULL; - entry_ptr->object_size = size; - entry_ptr->object = (ompi_registry_object_t)malloc(size); - memcpy(entry_ptr->object, object, size); - return_code = OMPI_SUCCESS; - goto CLEANUP; - } else { - return_code = OMPI_ERROR; - goto CLEANUP; - } - } - } - - /* no existing entry - create new one */ - entry_ptr = OBJ_NEW(mca_gpr_replica_core_t); - entry_ptr->keys = (mca_gpr_replica_key_t*)malloc(num_tokens*sizeof(mca_gpr_replica_key_t)); - memcpy(entry_ptr->keys, keys, num_tokens*sizeof(mca_gpr_replica_key_t)); - entry_ptr->num_keys = num_tokens; - entry_ptr->object_size = size; - entry_ptr->object = (ompi_registry_object_t*)malloc(size); - memcpy(entry_ptr->object, object, size); - ompi_list_append(&seg->registry_entries, &entry_ptr->item); - - return_code = OMPI_SUCCESS; - - /* update trigger list and check for trigger conditions */ - for (trig = (mca_gpr_replica_trigger_list_t*)ompi_list_get_first(&seg->triggers); - trig != (mca_gpr_replica_trigger_list_t*)ompi_list_get_end(&seg->triggers); - ) { - mca_gpr_replica_trigger_list_t* next = (mca_gpr_replica_trigger_list_t*)ompi_list_get_next(trig); - if (gpr_replica_check_key_list(trig->addr_mode, trig->num_keys, trig->keys, - num_tokens, keys)) { - trig->count++; - } - still_valid = true; - if (((OMPI_REGISTRY_SYNCHRO_MODE_ASCENDING & trig->synch_mode) - && (trig->count >= trig->trigger) - && (MCA_GPR_REPLICA_TRIGGER_BELOW_LEVEL == trig->above_below)) || - (OMPI_REGISTRY_SYNCHRO_MODE_LEVEL & trig->synch_mode && trig->count == trig->trigger) || - (OMPI_REGISTRY_SYNCHRO_MODE_GT_EQUAL & trig->synch_mode && trig->count >= trig->trigger)) { - notify_msg = gpr_replica_construct_notify_message(addr_mode, segment, trig->tokens); - notify_msg->trig_action = OMPI_REGISTRY_NOTIFY_NONE; - notify_msg->trig_synchro = trig->synch_mode; - still_valid = gpr_replica_process_triggers(segment, trig, notify_msg); - } else if ((OMPI_REGISTRY_NOTIFY_ALL & trig->action) || - (OMPI_REGISTRY_NOTIFY_ADD_ENTRY & trig->action) || - (OMPI_REGISTRY_NOTIFY_MODIFICATION & trig->action && OMPI_REGISTRY_OVERWRITE & put_mode)) { - notify_msg = gpr_replica_construct_notify_message(addr_mode, segment, trig->tokens); - notify_msg->trig_action = trig->action; - notify_msg->trig_synchro = OMPI_REGISTRY_SYNCHRO_MODE_NONE; - still_valid = gpr_replica_process_triggers(segment, trig, notify_msg); - } - if (still_valid) { - if (trig->count > trig->trigger) { - trig->above_below = MCA_GPR_REPLICA_TRIGGER_ABOVE_LEVEL; - } else if (trig->count == trig->trigger) { - trig->above_below = MCA_GPR_REPLICA_TRIGGER_AT_LEVEL; - } - } - trig = next; - } - - CLEANUP: - /* release list of keys */ - if (NULL != keylist) { - OBJ_RELEASE(keylist); - } - if (mca_gpr_replica_debug) { - ompi_output(0, "[%d,%d,%d] gpr replica-put: complete", ompi_rte_get_self()->cellid, - ompi_rte_get_self()->jobid, ompi_rte_get_self()->vpid); - } - - return return_code; -} - -int gpr_replica_delete_object(ompi_registry_mode_t addr_mode, - char *segment, char **tokens) -{ - int rc; - OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); - rc = gpr_replica_delete_object_nl(addr_mode, segment, tokens); - OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); - gpr_replica_process_callbacks(); - return rc; -} - - -int gpr_replica_delete_object_nl(ompi_registry_mode_t addr_mode, - char *segment, char **tokens) -{ - mca_gpr_replica_core_t *reg, *prev; - mca_gpr_replica_keytable_t *keyptr; - ompi_list_t *keylist; - mca_gpr_replica_key_t *keys, *key2; - mca_gpr_replica_segment_t *seg; - int num_tokens, return_code; - mca_gpr_replica_trigger_list_t *trig; - ompi_registry_notify_message_t *notify_msg; - bool still_valid; - - if (mca_gpr_replica_debug) { - ompi_output(0, "[%d,%d,%d] gpr replica: delete_object entered: segment 1st token", - ompi_rte_get_self()->cellid, ompi_rte_get_self()->jobid, - ompi_rte_get_self()->vpid, segment, *tokens); - } - - keys = NULL; - return_code = OMPI_ERROR; - - /* protect against errors */ - if (NULL == segment) { - return OMPI_ERROR; - } - - - /* find the specified segment */ - seg = gpr_replica_find_seg(false, segment); - if (NULL == seg) { /* segment not found */ - return OMPI_ERROR; - } - - /* convert tokens to list of keys */ - keylist = gpr_replica_get_key_list(segment, tokens); - if (0 == (num_tokens = ompi_list_get_size(keylist))) { /* no tokens provided - wildcard case */ - keys = NULL; - - } else { /* tokens provided */ - keys = (mca_gpr_replica_key_t*)malloc(num_tokens*sizeof(mca_gpr_replica_key_t)); - key2 = keys; - - /* traverse the list to find undefined tokens - error if found */ - for (keyptr = (mca_gpr_replica_keytable_t*)ompi_list_get_first(keylist); - keyptr != (mca_gpr_replica_keytable_t*)ompi_list_get_end(keylist); - keyptr = (mca_gpr_replica_keytable_t*)ompi_list_get_next(keyptr)) { - if (MCA_GPR_REPLICA_KEY_MAX == keyptr->key) { /* unknown token */ - return_code = OMPI_ERROR; - goto CLEANUP; - } - *key2 = keyptr->key; - key2++; - } - } - - /* traverse the segment's registry, looking for matching tokens per the specified mode */ - for (reg = (mca_gpr_replica_core_t*)ompi_list_get_first(&seg->registry_entries); - reg != (mca_gpr_replica_core_t*)ompi_list_get_end(&seg->registry_entries); - reg = (mca_gpr_replica_core_t*)ompi_list_get_next(reg)) { - - /* for each registry entry, check the key list */ - if (gpr_replica_check_key_list(addr_mode, num_tokens, keys, - reg->num_keys, reg->keys)) { /* found the key(s) on the list */ - prev = (mca_gpr_replica_core_t*)ompi_list_get_prev(reg); - ompi_list_remove_item(&seg->registry_entries, ®->item); - reg = prev; - } - } - - return_code = OMPI_SUCCESS; - - /* update synchro list and check for trigger conditions */ - for (trig = (mca_gpr_replica_trigger_list_t*)ompi_list_get_first(&seg->triggers); - trig != (mca_gpr_replica_trigger_list_t*)ompi_list_get_end(&seg->triggers); - ) { - mca_gpr_replica_trigger_list_t* next = (mca_gpr_replica_trigger_list_t*)ompi_list_get_next(trig); - if (gpr_replica_check_key_list(trig->addr_mode, trig->num_keys, trig->keys, - num_tokens, keys)) { - trig->count--; - } - still_valid = true; - if (((OMPI_REGISTRY_SYNCHRO_MODE_DESCENDING & trig->synch_mode) - && (trig->count <= trig->trigger) - && (MCA_GPR_REPLICA_TRIGGER_ABOVE_LEVEL == trig->above_below)) || - (OMPI_REGISTRY_SYNCHRO_MODE_LEVEL & trig->synch_mode && trig->count == trig->trigger)) { - notify_msg = gpr_replica_construct_notify_message(addr_mode, segment, trig->tokens); - notify_msg->trig_action = OMPI_REGISTRY_NOTIFY_NONE; - notify_msg->trig_synchro = trig->synch_mode; - still_valid = gpr_replica_process_triggers(segment, trig, notify_msg); - } else if ((OMPI_REGISTRY_NOTIFY_ALL & trig->action) || - (OMPI_REGISTRY_NOTIFY_DELETE_ENTRY & trig->action)) { - notify_msg = gpr_replica_construct_notify_message(addr_mode, segment, trig->tokens); - notify_msg->trig_action = trig->action; - notify_msg->trig_synchro = OMPI_REGISTRY_SYNCHRO_MODE_NONE; - still_valid = gpr_replica_process_triggers(segment, trig, notify_msg); - } - if (still_valid) { - if (trig->count < trig->trigger) { - trig->above_below = MCA_GPR_REPLICA_TRIGGER_BELOW_LEVEL; - } else if (trig->count == trig->trigger) { - trig->above_below = MCA_GPR_REPLICA_TRIGGER_AT_LEVEL; - } - } - trig = next; - } - - - CLEANUP: - if (NULL != keylist) { - OBJ_RELEASE(keylist); - } - - if (NULL != keys) { - free(keys); - keys = NULL; - } - - return return_code; -} - -ompi_list_t* gpr_replica_index(char *segment) -{ - ompi_list_t* list; - OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); - list = gpr_replica_index_nl(segment); - OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); - return list; -} - -ompi_list_t* gpr_replica_index_nl(char *segment) -{ - ompi_list_t *answer; - mca_gpr_replica_keytable_t *ptr; - mca_gpr_replica_segment_t *seg; - ompi_registry_index_value_t *ans; - - if (mca_gpr_replica_debug) { - ompi_output(0, "[%d,%d,%d] gpr replica: index entered segment: %s", ompi_rte_get_self()->cellid, - ompi_rte_get_self()->jobid, ompi_rte_get_self()->vpid, segment); - } - - answer = OBJ_NEW(ompi_list_t); - - if (NULL == segment) { /* looking for index of global registry */ - for (ptr = (mca_gpr_replica_keytable_t*)ompi_list_get_first(&mca_gpr_replica_head.segment_dict); - ptr != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&mca_gpr_replica_head.segment_dict); - ptr = (mca_gpr_replica_keytable_t*)ompi_list_get_next(ptr)) { - ans = OBJ_NEW(ompi_registry_index_value_t); - ans->token = strdup(ptr->token); - ompi_list_append(answer, &ans->item); - } - } else { /* want index of specific segment */ - /* find the specified segment */ - seg = gpr_replica_find_seg(false, segment); - if (NULL == seg) { /* segment not found */ - return answer; - } - /* got segment - now index that dictionary */ - for (ptr = (mca_gpr_replica_keytable_t*)ompi_list_get_first(&seg->keytable); - ptr != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&seg->keytable); - ptr = (mca_gpr_replica_keytable_t*)ompi_list_get_next(ptr)) { - ans = OBJ_NEW(ompi_registry_index_value_t); - ans->token = strdup(ptr->token); - ompi_list_append(answer, &ans->item); - } - - } - return answer; -} - -int gpr_replica_subscribe(ompi_registry_mode_t addr_mode, - ompi_registry_notify_action_t action, - char *segment, char **tokens, - ompi_registry_notify_cb_fn_t cb_func, void *user_tag) -{ - int rc; - mca_gpr_notify_id_t local_idtag; - - OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); - - /* enter request on notify tracking system */ - local_idtag = gpr_replica_enter_notify_request(NULL, 0, cb_func, user_tag); - - /* process subscription */ - rc = gpr_replica_subscribe_nl(addr_mode,action,segment,tokens,local_idtag); - - OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); - - gpr_replica_process_callbacks(); - return rc; -} - - -int gpr_replica_subscribe_nl(ompi_registry_mode_t addr_mode, - ompi_registry_notify_action_t action, - char *segment, char **tokens, mca_gpr_notify_id_t id_tag) -{ - mca_gpr_replica_trigger_list_t *trig; - ompi_registry_notify_message_t *notify_msg; - - if (mca_gpr_replica_debug) { - ompi_output(0, "[%d,%d,%d] gpr replica: subscribe entered: segment %s 1st token %s", - ompi_rte_get_self()->cellid, ompi_rte_get_self()->jobid, - ompi_rte_get_self()->vpid, segment, tokens ? *tokens : ""); - } - - /* protect against errors */ - if (NULL == segment) { - return OMPI_ERROR; - } - - - /* construct the trigger - add to notify tracking system if success, otherwise dump */ - if (NULL != (trig = gpr_replica_construct_trigger(OMPI_REGISTRY_SYNCHRO_MODE_NONE, action, - addr_mode, segment, tokens, - 0, id_tag))) { - - if (OMPI_REGISTRY_NOTIFY_PRE_EXISTING & action) { /* want list of everything there */ - notify_msg = gpr_replica_construct_notify_message(addr_mode, segment, trig->tokens); - notify_msg->trig_action = action; - notify_msg->trig_synchro = OMPI_REGISTRY_SYNCHRO_MODE_NONE; - gpr_replica_process_triggers(segment, trig, notify_msg); - } - return OMPI_SUCCESS; - } else { - return OMPI_ERROR; - } -} - - -int gpr_replica_unsubscribe(ompi_registry_mode_t addr_mode, - ompi_registry_notify_action_t action, - char *segment, char **tokens) -{ - int rc; - OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); - rc = gpr_replica_unsubscribe_nl(addr_mode,action,segment,tokens); - OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); - - if (MCA_GPR_NOTIFY_ID_MAX == rc) { - return OMPI_ERROR; - } - - return OMPI_SUCCESS; -} - - -mca_gpr_notify_id_t gpr_replica_unsubscribe_nl(ompi_registry_mode_t addr_mode, - ompi_registry_notify_action_t action, - char *segment, char **tokens) -{ - mca_gpr_notify_id_t id_tag, req_idtag; - - if (mca_gpr_replica_debug) { - ompi_output(0, "[%d,%d,%d] gpr replica: unsubscribe entered: segment %s 1st token %s", - ompi_rte_get_self()->cellid, ompi_rte_get_self()->jobid, - ompi_rte_get_self()->vpid, segment, *tokens); - } - - /* protect against errors */ - if (NULL == segment) { - return MCA_GPR_NOTIFY_ID_MAX; - } - - - /* find trigger on replica - return id_tag */ - id_tag = gpr_replica_remove_trigger(OMPI_REGISTRY_SYNCHRO_MODE_NONE, action, - addr_mode, segment, tokens, 0); - - if (MCA_GPR_NOTIFY_ID_MAX != id_tag) { /* removed trigger successfully */ - - req_idtag = gpr_replica_remove_notify_request(id_tag); - return req_idtag; - } else { - return MCA_GPR_NOTIFY_ID_MAX; - } - -} - -int gpr_replica_synchro(ompi_registry_synchro_mode_t synchro_mode, - ompi_registry_mode_t addr_mode, - char *segment, char **tokens, int trigger, - ompi_registry_notify_cb_fn_t cb_func, void *user_tag) -{ - int rc; - mca_gpr_notify_id_t local_idtag; - - OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); - - /* enter request on notify tracking system */ - local_idtag = gpr_replica_enter_notify_request(NULL, 0, cb_func, user_tag); - - /* process synchro request */ - rc = gpr_replica_synchro_nl(synchro_mode, addr_mode, - segment, tokens, trigger, local_idtag); - - OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); - - gpr_replica_process_callbacks(); - return rc; -} - -int gpr_replica_synchro_nl(ompi_registry_synchro_mode_t synchro_mode, - ompi_registry_mode_t addr_mode, - char *segment, char **tokens, int trigger, - mca_gpr_notify_id_t id_tag) -{ - mca_gpr_replica_trigger_list_t *trig; - ompi_registry_notify_message_t *notify_msg; - - if (mca_gpr_replica_debug) { - ompi_output(0, "[%d,%d,%d] gpr replica: synchro entered on segment %s trigger %d", - ompi_rte_get_self()->cellid, ompi_rte_get_self()->jobid, - ompi_rte_get_self()->vpid, segment, trigger); - } - - /* protect against errors */ - if (NULL == segment || 0 > trigger) { - return OMPI_ERROR; - } - - /* construct the trigger */ - if (NULL != (trig = gpr_replica_construct_trigger(synchro_mode, OMPI_REGISTRY_NOTIFY_NONE, - addr_mode, segment, tokens, - trigger, id_tag))) { - - /* if synchro condition already met, construct and send message */ - if ((OMPI_REGISTRY_SYNCHRO_MODE_GT_EQUAL & synchro_mode && trig->count >= trigger) || - (OMPI_REGISTRY_SYNCHRO_MODE_LEVEL & synchro_mode && trig->count == trigger) || - (OMPI_REGISTRY_SYNCHRO_MODE_LT_EQUAL & synchro_mode && trig->count <= trigger)) { - notify_msg = gpr_replica_construct_notify_message(addr_mode, segment, trig->tokens); - notify_msg->trig_action = OMPI_REGISTRY_NOTIFY_NONE; - notify_msg->trig_synchro = trig->synch_mode; - gpr_replica_process_triggers(segment, trig, notify_msg); - } - return OMPI_SUCCESS; - } else { - return OMPI_ERROR; - } -} - -int gpr_replica_cancel_synchro(ompi_registry_synchro_mode_t synchro_mode, - ompi_registry_mode_t addr_mode, - char *segment, char **tokens, int trigger) -{ - int rc; - OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); - rc = gpr_replica_cancel_synchro_nl(synchro_mode,addr_mode,segment,tokens,trigger); - OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); - - if (MCA_GPR_NOTIFY_ID_MAX == rc) { - return OMPI_ERROR; - } - - return OMPI_SUCCESS; -} - -mca_gpr_notify_id_t gpr_replica_cancel_synchro_nl(ompi_registry_synchro_mode_t synchro_mode, - ompi_registry_mode_t addr_mode, - char *segment, char **tokens, int trigger) -{ - mca_gpr_notify_id_t id_tag, req_idtag; - - if (mca_gpr_replica_debug) { - ompi_output(0, "[%d,%d,%d] gpr replica: cancel_synchro entered: segment %s 1st token %s", - ompi_rte_get_self()->cellid, ompi_rte_get_self()->jobid, - ompi_rte_get_self()->vpid, segment, *tokens); - } - - /* protect against errors */ - if (NULL == segment || 0 > trigger) { - return OMPI_ERROR; - } - - /* find trigger on replica - return local id_tag */ - id_tag = gpr_replica_remove_trigger(synchro_mode, OMPI_REGISTRY_NOTIFY_NONE, - addr_mode, segment, tokens, trigger); - - if (MCA_GPR_NOTIFY_ID_MAX != id_tag) { /* removed trigger successfully */ - /* remove notify request - return requestor id_tag */ - req_idtag = gpr_replica_remove_notify_request(id_tag); - return req_idtag; - } else { - return MCA_GPR_NOTIFY_ID_MAX; - } -} - - -int gpr_replica_rte_register(char *contact_info, size_t num_procs, - ompi_registry_notify_cb_fn_t start_cb_func, void *start_user_tag, - ompi_registry_notify_cb_fn_t end_cb_func, void *end_user_tag) -{ - int ret; - mca_gpr_notify_id_t local_idtag1, local_idtag2; - ompi_buffer_t buffer; - - /* create the buffer to store the local information */ - ompi_buffer_init(&buffer, 0); - ompi_pack_string(buffer, contact_info); - ompi_pack(buffer, &ompi_process_info.pid, 1, OMPI_INT32); - ompi_pack_string(buffer, ompi_system_info.nodename); - - OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); - - local_idtag1 = gpr_replica_enter_notify_request(NULL, 0, start_cb_func, start_user_tag); - - local_idtag2 = gpr_replica_enter_notify_request(NULL, 0, end_cb_func, end_user_tag); - - ret = gpr_replica_rte_register_nl(contact_info, buffer, num_procs, - local_idtag1, local_idtag2); - - OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); - - gpr_replica_process_callbacks(); - return ret; -} - -int gpr_replica_rte_register_nl(char *contact_info, ompi_buffer_t buffer, size_t num_procs, - mca_gpr_notify_id_t start_idtag, - mca_gpr_notify_id_t end_idtag) -{ - char *segment; - char *keys[2]; - void *addr; - int rc, size; - ompi_process_name_t proc={0,0,0}; - - - /* extract process name from contact info */ - mca_oob_parse_contact_info(contact_info, &proc, NULL); - - /* setup keys and segment for this job */ - asprintf(&segment, "ompi-job-%s", ns_base_get_jobid_string(&proc)); - keys[0] = ns_base_get_proc_name_string(&proc); - keys[1] = NULL; - - if (mca_gpr_replica_debug) { - ompi_output(0, "gpr_replica_register: entered for proc %s", keys[0]); - } - - /* peek the buffer and resulting size */ - ompi_buffer_get(buffer, &addr, &size); - - /* place on registry, no overwrite - error if already there */ - rc = gpr_replica_put_nl(OMPI_REGISTRY_XAND, - segment, keys, addr, size); - - if (OMPI_SUCCESS != rc) { - if (mca_gpr_replica_debug) { - ompi_output(0, "gpr_replica_register: duplicate registration attempt from %s", keys[0]); - } - return rc; - } - - /* register a synchro on the segment so we get notified when everyone registers */ - rc = gpr_replica_synchro_nl( - OMPI_REGISTRY_SYNCHRO_MODE_LEVEL|OMPI_REGISTRY_SYNCHRO_MODE_ONE_SHOT, - OMPI_REGISTRY_OR, - segment, - NULL, - num_procs, - start_idtag); - - /* register a synchro on the segment so we get notified when everyone is gone */ - rc = gpr_replica_synchro_nl( - OMPI_REGISTRY_SYNCHRO_MODE_DESCENDING|OMPI_REGISTRY_SYNCHRO_MODE_ONE_SHOT, - OMPI_REGISTRY_OR, - segment, - NULL, - 0, - end_idtag); - - return rc; -} - -int gpr_replica_rte_unregister(char *proc_name_string) -{ - int ret; - - OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); - ret = gpr_replica_rte_unregister_nl(proc_name_string); - OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); - - gpr_replica_process_callbacks(); - return ret; -} - - -int gpr_replica_rte_unregister_nl(char *proc_name_string) -{ - char *segment; - char *keys[2]; - int rc; - ompi_process_name_t *proc; - - /* convert string to process name */ - proc = ns_base_convert_string_to_process_name(proc_name_string); - - /* setup keys and segment for this job */ - asprintf(&segment, "ompi-job-%s", ns_base_get_jobid_string(proc)); - keys[0] = strdup(proc_name_string); - keys[1] = NULL; - - rc = gpr_replica_delete_object_nl(OMPI_REGISTRY_XAND, segment, keys); - free(keys[0]); - free(proc); - return rc; - -} - - -ompi_list_t* gpr_replica_get(ompi_registry_mode_t addr_mode, - char *segment, char **tokens) -{ - ompi_list_t* list; - OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); - list = gpr_replica_get_nl(addr_mode, segment, tokens); - OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); - return list; -} - -ompi_list_t* gpr_replica_get_nl(ompi_registry_mode_t addr_mode, - char *segment, char **tokens) -{ - mca_gpr_replica_segment_t *seg=NULL; - ompi_list_t *answer=NULL; - ompi_registry_value_t *ans=NULL; - mca_gpr_replica_key_t *keys=NULL, *key2=NULL; - ompi_list_t *keylist=NULL; - mca_gpr_replica_keytable_t *keyptr=NULL; - mca_gpr_replica_core_t *reg=NULL; - int num_tokens=0; - - if (mca_gpr_replica_debug) { - ompi_output(0, "[%d,%d,%d] gpr replica: get entered", ompi_rte_get_self()->cellid, - ompi_rte_get_self()->jobid, ompi_rte_get_self()->vpid); - } - - answer = OBJ_NEW(ompi_list_t); - - /* protect against errors */ - if (NULL == segment) { - return answer; - } - - /* find the specified segment */ - seg = gpr_replica_find_seg(false, segment); - if (NULL == seg) { /* segment not found */ - return answer; - } - if (mca_gpr_replica_debug) { - ompi_output(0, "[%d,%d,%d] gpr replica-get: segment found", ompi_rte_get_self()->cellid, - ompi_rte_get_self()->jobid, ompi_rte_get_self()->vpid); - } - - if (NULL == tokens) { /* wildcard case - return everything */ - keylist = NULL; - keys = NULL; - } else { - - /* convert tokens to list of keys */ - keylist = gpr_replica_get_key_list(segment, tokens); - if (0 == (num_tokens = ompi_list_get_size(keylist))) { - return answer; - } - - keys = (mca_gpr_replica_key_t*)malloc(num_tokens*sizeof(mca_gpr_replica_key_t)); - key2 = keys; - - /* traverse the list to find undefined tokens - error if found */ - for (keyptr = (mca_gpr_replica_keytable_t*)ompi_list_get_first(keylist); - keyptr != (mca_gpr_replica_keytable_t*)ompi_list_get_end(keylist); - keyptr = (mca_gpr_replica_keytable_t*)ompi_list_get_next(keyptr)) { - if (MCA_GPR_REPLICA_KEY_MAX == keyptr->key) { /* unknown token */ - goto CLEANUP; - } - *key2 = keyptr->key; - key2++; - } - } - if (mca_gpr_replica_debug) { - ompi_output(0, "[%d,%d,%d] gpr replica-get: got keylist", ompi_rte_get_self()->cellid, - ompi_rte_get_self()->jobid, ompi_rte_get_self()->vpid); - } - - /* traverse the segment's registry, looking for matching tokens per the specified mode */ - for (reg = (mca_gpr_replica_core_t*)ompi_list_get_first(&seg->registry_entries); - reg != (mca_gpr_replica_core_t*)ompi_list_get_end(&seg->registry_entries); - reg = (mca_gpr_replica_core_t*)ompi_list_get_next(reg)) { - - /* for each registry entry, check the key list */ - if (gpr_replica_check_key_list(addr_mode, num_tokens, keys, - reg->num_keys, reg->keys)) { /* found the key(s) on the list */ - ans = OBJ_NEW(ompi_registry_value_t); - ans->object_size = reg->object_size; - ans->object = (ompi_registry_object_t*)malloc(ans->object_size); - memcpy(ans->object, reg->object, ans->object_size); - ompi_list_append(answer, &ans->item); - } - } - if (mca_gpr_replica_debug) { - ompi_output(0, "[%d,%d,%d] gpr replica-get: finished search", ompi_rte_get_self()->cellid, - ompi_rte_get_self()->jobid, ompi_rte_get_self()->vpid); - } - - CLEANUP: - /* release list of keys */ - if(NULL != keylist) - OBJ_RELEASE(keylist); - - if (NULL != keys) { - free(keys); - keys = NULL; - } - - if (mca_gpr_replica_debug) { - ompi_output(0, "[%d,%d,%d] gpr replica-get: leaving", ompi_rte_get_self()->cellid, - ompi_rte_get_self()->jobid, ompi_rte_get_self()->vpid); - } - return answer; -} - diff --git a/src/mca/gpr/replica/gpr_replica.h b/src/mca/gpr/replica/gpr_replica.h index 39c28a40b8..f2d60300c2 100644 --- a/src/mca/gpr/replica/gpr_replica.h +++ b/src/mca/gpr/replica/gpr_replica.h @@ -3,6 +3,9 @@ * $HEADER$ * */ +/** @file + */ + #ifndef GPR_REPLICA_H #define GPR_REPLICA_H @@ -11,9 +14,9 @@ #include -#include "include/types.h" -#include "include/constants.h" -#include "class/ompi_list.h" +#include "threads/mutex.h" +#include "threads/condition.h" + #include "mca/gpr/base/base.h" /* @@ -32,6 +35,7 @@ typedef uint32_t mca_gpr_replica_key_t; struct mca_gpr_replica_t { ompi_list_t registry; ompi_list_t segment_dict; + ompi_list_t triggers; mca_gpr_replica_key_t lastkey; ompi_list_t freekeys; }; @@ -45,15 +49,27 @@ OBJ_CLASS_DECLARATION(mca_gpr_replica_t); struct mca_gpr_replica_callbacks_t { ompi_list_item_t item; ompi_registry_notify_cb_fn_t cb_func; + void *user_tag; ompi_registry_notify_message_t *message; ompi_process_name_t *requestor; - int remote_idtag; - void *user_tag; + ompi_registry_notify_id_t remote_idtag; }; typedef struct mca_gpr_replica_callbacks_t mca_gpr_replica_callbacks_t; OBJ_CLASS_DECLARATION(mca_gpr_replica_callbacks_t); +/* + * List of process names who have notification turned OFF + */ +struct mca_gpr_replica_notify_off_t { + ompi_list_item_t item; + ompi_registry_notify_id_t sub_number; + ompi_process_name_t *proc; +}; +typedef struct mca_gpr_replica_notify_off_t mca_gpr_replica_notify_off_t; + +OBJ_CLASS_DECLARATION(mca_gpr_replica_notify_off_t); + /** Dictionary of token-key pairs. * This structure is used to create a linked list of token-key pairs. All calls to * registry functions pass character string tokens for programming clarity - the ompi_keytable @@ -91,6 +107,7 @@ OBJ_CLASS_DECLARATION(mca_gpr_replica_keylist_t); */ struct mca_gpr_replica_trigger_list_t { ompi_list_item_t item; /**< Allows this item to be placed on a list */ + mca_ns_base_jobid_t owning_job; /**< Job ID of the process that registered trigger */ ompi_registry_synchro_mode_t synch_mode; /**< Synchro mode - ascending, descending, ... */ ompi_registry_notify_action_t action; /**< Bit-mask of actions that trigger non-synchro notification */ ompi_registry_mode_t addr_mode; /**< Addressing mode */ @@ -100,13 +117,21 @@ struct mca_gpr_replica_trigger_list_t { uint32_t trigger; /**< Number of objects that trigger notification */ uint32_t count; /**< Number of qualifying objects currently in segment */ int8_t above_below; /**< Tracks transitions across level */ - mca_gpr_notify_id_t id_tag; /**< Tag into the list of notify structures */ + ompi_registry_notify_id_t local_idtag; /**< Tag into the list of notify structures */ }; typedef struct mca_gpr_replica_trigger_list_t mca_gpr_replica_trigger_list_t; -#define MCA_GPR_REPLICA_TRIGGER_ABOVE_LEVEL 1 -#define MCA_GPR_REPLICA_TRIGGER_BELOW_LEVEL -1 -#define MCA_GPR_REPLICA_TRIGGER_AT_LEVEL 0 +#define MCA_GPR_REPLICA_TRIGGER_ABOVE_LEVEL (int8_t) 1 +#define MCA_GPR_REPLICA_TRIGGER_BELOW_LEVEL (int8_t) -1 +#define MCA_GPR_REPLICA_TRIGGER_AT_LEVEL (int8_t) 0 + +/* define a few action flags for trigger evaluation + */ +#define MCA_GPR_REPLICA_OBJECT_ADDED (int8_t) 1 +#define MCA_GPR_REPLICA_OBJECT_DELETED (int8_t) 2 +#define MCA_GPR_REPLICA_OBJECT_UPDATED (int8_t) 3 +#define MCA_GPR_REPLICA_SUBSCRIBER_ADDED (int8_t) 4 + OBJ_CLASS_DECLARATION(mca_gpr_replica_trigger_list_t); @@ -175,10 +200,13 @@ OBJ_CLASS_DECLARATION(mca_gpr_replica_core_t); */ struct mca_gpr_replica_segment_t { ompi_list_item_t item; /**< Allows this item to be placed on a list */ - mca_gpr_replica_key_t segment; /**< Key corresponding to name of registry segment */ + char *name; /**< Name of the segment */ + mca_ns_base_jobid_t owning_job; /**< Job that "owns" this segment */ + mca_gpr_replica_key_t key; /**< Key corresponding to name of registry segment */ mca_gpr_replica_key_t lastkey; /**< Highest key value used */ ompi_list_t registry_entries; /**< Linked list of stored objects within this segment */ ompi_list_t triggers; /**< List of triggers on this segment */ + bool triggers_active; /**< Indicates if triggers are active or not */ ompi_list_t keytable; /**< Token-key dictionary for this segment */ ompi_list_t freekeys; /**< List of keys that have been made available */ }; @@ -187,16 +215,41 @@ typedef struct mca_gpr_replica_segment_t mca_gpr_replica_segment_t; OBJ_CLASS_DECLARATION(mca_gpr_replica_segment_t); +struct mca_gpr_replica_notify_request_tracker_t { + ompi_list_item_t item; /**< Allows this item to be placed on a list */ + ompi_process_name_t *requestor; /**< Name of requesting process */ + ompi_registry_notify_cb_fn_t callback; /**< Function to be called for notificaiton */ + void *user_tag; /**< User-provided tag for callback function */ + ompi_registry_notify_id_t local_idtag; /**< Local ID tag of associated subscription */ + ompi_registry_notify_id_t remote_idtag; /**< Remote ID tag of subscription */ + mca_gpr_replica_segment_t *segptr; /**< Pointer to segment that subscription was + placed upon */ + ompi_registry_notify_action_t action; /**< The action that triggers the request */ +}; +typedef struct mca_gpr_replica_notify_request_tracker_t mca_gpr_replica_notify_request_tracker_t; + +OMPI_DECLSPEC OBJ_CLASS_DECLARATION(mca_gpr_replica_notify_request_tracker_t); + + /* * globals needed within component */ -extern mca_gpr_replica_t mca_gpr_replica_head; -extern ompi_list_t mca_gpr_replica_notify_request_tracker; -extern ompi_list_t mca_gpr_replica_callbacks; -extern mca_gpr_notify_id_t mca_gpr_replica_last_notify_id_tag; -extern ompi_list_t mca_gpr_replica_free_notify_id_tags; -extern int mca_gpr_replica_debug; -extern ompi_mutex_t mca_gpr_replica_mutex; +extern mca_gpr_replica_t mca_gpr_replica_head; /**< Head of the entire registry */ +extern ompi_list_t mca_gpr_replica_notify_request_tracker; /**< List of requested notifications */ +extern ompi_list_t mca_gpr_replica_callbacks; /**< List of callbacks currently pending */ +extern ompi_list_t mca_gpr_replica_notify_off_list; /**< List of processes and subscriptions with notify turned off */ +extern ompi_registry_notify_id_t mca_gpr_replica_last_notify_id_tag; /**< Next available notify id tag */ +extern ompi_list_t mca_gpr_replica_free_notify_id_tags; /**< List of free notify id tags */ +extern int mca_gpr_replica_debug; /**< Debug flag to control debugging output */ +extern ompi_mutex_t mca_gpr_replica_mutex; /**< Thread lock for registry functions */ +extern bool mca_gpr_replica_compound_cmd_mode; /**< Indicates if we are building compound cmd */ +extern bool mca_gpr_replica_exec_compound_cmd_mode; /**< Indicates if we are executing compound cmd */ +extern ompi_buffer_t mca_gpr_replica_compound_cmd; /**< Compound cmd buffer */ +extern ompi_mutex_t mca_gpr_replica_wait_for_compound_mutex; /**< Lock to protect build compound cmd */ +extern ompi_condition_t mca_gpr_replica_compound_cmd_condition; /**< Condition variable to control thread access to build compound cmd */ +extern int mca_gpr_replica_compound_cmd_waiting; /**< Count number of threads waiting to build compound cmd */ +extern bool mca_gpr_replica_silent_mode; /**< Indicates if local silent mode active */ + /* * Module open / close @@ -212,80 +265,162 @@ mca_gpr_base_module_t *mca_gpr_replica_init(bool *allow_multi_user_threads, bool int mca_gpr_replica_finalize(void); /* - * Implemented registry functions + * Implemented registry functions - see gpr.h for documentation */ -int gpr_replica_delete_segment(char *segment); -int gpr_replica_delete_segment_nl(char *segment); +/* + * Compound cmd functions + */ +int mca_gpr_replica_begin_compound_cmd(void); -int gpr_replica_put(ompi_registry_mode_t addr_mode, char *segment, - char **tokens, ompi_registry_object_t object, - ompi_registry_object_size_t size); -int gpr_replica_put_nl(ompi_registry_mode_t addr_mode, char *segment, +int mca_gpr_replica_stop_compound_cmd(void); + +ompi_list_t* mca_gpr_replica_exec_compound_cmd(bool return_requested); + +/* + * Mode operations + */ +void mca_gpr_replica_silent_mode_on(void); + +void mca_gpr_replica_silent_mode_off(void); + +void mca_gpr_replica_notify_off(ompi_registry_notify_id_t sub_number); +void mca_gpr_replica_notify_off_nl(ompi_process_name_t *proc, ompi_registry_notify_id_t sub_number); + +void mca_gpr_replica_triggers_active(mca_ns_base_jobid_t jobid); +void mca_gpr_replica_triggers_active_nl(mca_ns_base_jobid_t jobid); + +void mca_gpr_replica_triggers_inactive(mca_ns_base_jobid_t jobid); +void mca_gpr_replica_triggers_inactive_nl(mca_ns_base_jobid_t jobid); + +void mca_gpr_replica_notify_on(ompi_registry_notify_id_t sub_number); +void mca_gpr_replica_notify_on_nl(ompi_process_name_t *proc, ompi_registry_notify_id_t sub_number); + +int mca_gpr_replica_assume_ownership(char *segment); +int mca_gpr_replica_assume_ownership_nl(mca_gpr_replica_segment_t *seg, + mca_ns_base_jobid_t jobid); + +/* + * Delete-index functions + */ +int mca_gpr_replica_delete_segment(char *segment); +void mca_gpr_replica_delete_segment_nl(mca_gpr_replica_segment_t *seg); + +int mca_gpr_replica_delete_object(ompi_registry_mode_t addr_mode, + char *segment, char **tokens); +int mca_gpr_replica_delete_object_nl(ompi_registry_mode_t addr_mode, + mca_gpr_replica_segment_t *seg, + mca_gpr_replica_key_t *keys, + int num_keys); + +ompi_list_t* mca_gpr_replica_index(char *segment); +ompi_list_t* mca_gpr_replica_index_nl(mca_gpr_replica_segment_t *seg); + +/* + * Cleanup functions + */ +void mca_gpr_replica_cleanup_job(mca_ns_base_jobid_t jobid); +void mca_gpr_replica_cleanup_job_nl(mca_ns_base_jobid_t jobid); + +void mca_gpr_replica_cleanup_proc(bool purge, ompi_process_name_t *proc); +void mca_gpr_replica_cleanup_proc_nl(bool purge, ompi_process_name_t *proc); + +/* + * Put-get functions + */ +int mca_gpr_replica_put(ompi_registry_mode_t addr_mode, char *segment, char **tokens, ompi_registry_object_t object, ompi_registry_object_size_t size); +int mca_gpr_replica_put_nl(ompi_registry_mode_t addr_mode, + mca_gpr_replica_segment_t *seg, + mca_gpr_replica_key_t *keys, + int num_keys, ompi_registry_object_t object, + ompi_registry_object_size_t size, + int8_t *action_taken); -int gpr_replica_delete_object(ompi_registry_mode_t addr_mode, - char *segment, char **tokens); -int gpr_replica_delete_object_nl(ompi_registry_mode_t addr_mode, - char *segment, char **tokens); +ompi_list_t* mca_gpr_replica_get(ompi_registry_mode_t addr_mode, + char *segment, char **tokens); +ompi_list_t* mca_gpr_replica_get_nl(ompi_registry_mode_t addr_mode, + mca_gpr_replica_segment_t *seg, + mca_gpr_replica_key_t *keys, + int num_keys); -ompi_list_t* gpr_replica_index(char *segment); -ompi_list_t* gpr_replica_index_nl(char *segment); - -int gpr_replica_subscribe(ompi_registry_mode_t addr_mode, +/* + * Subscribe functions + */ +ompi_registry_notify_id_t mca_gpr_replica_subscribe(ompi_registry_mode_t addr_mode, ompi_registry_notify_action_t action, char *segment, char **tokens, ompi_registry_notify_cb_fn_t cb_func, void *user_tag); -int gpr_replica_subscribe_nl(ompi_registry_mode_t addr_mode, - ompi_registry_notify_action_t action, - char *segment, char **tokens, mca_gpr_notify_id_t idtag); +int mca_gpr_replica_subscribe_nl(ompi_registry_mode_t addr_mode, + ompi_registry_notify_action_t action, + mca_gpr_replica_segment_t *seg, + mca_gpr_replica_key_t *keys, + int num_keys, + ompi_registry_notify_id_t id_tag); -int gpr_replica_unsubscribe(ompi_registry_mode_t addr_mode, - ompi_registry_notify_action_t action, - char *segment, char **tokens); -mca_gpr_notify_id_t gpr_replica_unsubscribe_nl(ompi_registry_mode_t addr_mode, - ompi_registry_notify_action_t action, - char *segment, char **tokens); +int mca_gpr_replica_unsubscribe(ompi_registry_notify_id_t sub_number); +ompi_registry_notify_id_t mca_gpr_replica_unsubscribe_nl(ompi_registry_notify_id_t sub_number); -int gpr_replica_synchro(ompi_registry_synchro_mode_t synchro_mode, +/* + * Synchro functions + */ +ompi_registry_notify_id_t mca_gpr_replica_synchro(ompi_registry_synchro_mode_t synchro_mode, ompi_registry_mode_t addr_mode, char *segment, char **tokens, int trigger, ompi_registry_notify_cb_fn_t cb_func, void *user_tag); -int gpr_replica_synchro_nl(ompi_registry_synchro_mode_t synchro_mode, - ompi_registry_mode_t addr_mode, - char *segment, char **tokens, int trigger, - mca_gpr_notify_id_t id_tag); - -int gpr_replica_cancel_synchro(ompi_registry_synchro_mode_t synchro_mode, +int mca_gpr_replica_synchro_nl(ompi_registry_synchro_mode_t synchro_mode, ompi_registry_mode_t addr_mode, - char *segment, char **tokens, int trigger); -mca_gpr_notify_id_t gpr_replica_cancel_synchro_nl(ompi_registry_synchro_mode_t synchro_mode, - ompi_registry_mode_t addr_mode, - char *segment, char **tokens, int trigger); + mca_gpr_replica_segment_t *seg, + mca_gpr_replica_key_t *keys, + int num_keys, + int trigger, + ompi_registry_notify_id_t id_tag); -ompi_list_t* gpr_replica_get(ompi_registry_mode_t addr_mode, - char *segment, char **tokens); -ompi_list_t* gpr_replica_get_nl(ompi_registry_mode_t addr_mode, - char *segment, char **tokens); +int mca_gpr_replica_cancel_synchro(ompi_registry_notify_id_t synch_number); +ompi_registry_notify_id_t mca_gpr_replica_cancel_synchro_nl(ompi_registry_notify_id_t synch_number); -ompi_list_t* gpr_replica_test_internals(int level); +/* + * Dump function + */ +void mca_gpr_replica_dump(int output_id); +void mca_gpr_replica_dump_nl(ompi_buffer_t buffer); + +/* + * Messaging functions + */ +void mca_gpr_replica_deliver_notify_msg(ompi_registry_notify_action_t state, + ompi_registry_notify_message_t *message); + + +/* + * Test internals + */ +ompi_list_t* mca_gpr_replica_test_internals(int level); + +/* + * Startup/shutdown functions + */ +ompi_buffer_t mca_gpr_replica_get_startup_msg(mca_ns_base_jobid_t jobid, + ompi_list_t *recipients); + +ompi_buffer_t mca_gpr_replica_get_shutdown_msg(mca_ns_base_jobid_t jobid, + ompi_list_t *recipients); + +ompi_buffer_t +mca_gpr_replica_construct_startup_shutdown_msg_nl(int mode, + mca_ns_base_jobid_t jobid, + ompi_list_t *recipients); + +/* + * Functions that interface to the proxy, but aren't available outside the gpr subsystem + */ void mca_gpr_replica_recv(int status, ompi_process_name_t* sender, ompi_buffer_t buffer, int tag, void* cbdata); -void gpr_replica_remote_notify(ompi_process_name_t *recipient, int recipient_tag, +void mca_gpr_replica_remote_notify(ompi_process_name_t *recipient, int recipient_tag, ompi_registry_notify_message_t *message); -int gpr_replica_rte_register(char *contact_info, size_t num_procs, - ompi_registry_notify_cb_fn_t start_cb_func, void *start_user_tag, - ompi_registry_notify_cb_fn_t end_cb_func, void *end_user_tag); - -int gpr_replica_rte_register_nl(char *contact_info, ompi_buffer_t buffer, size_t num_procs, - mca_gpr_notify_id_t start_tag, mca_gpr_notify_id_t end_tag); - -int gpr_replica_rte_unregister(char *proc_name_string); - -int gpr_replica_rte_unregister_nl(char *proc_name_string); #endif diff --git a/src/mca/gpr/replica/gpr_replica_cleanup.c b/src/mca/gpr/replica/gpr_replica_cleanup.c new file mode 100644 index 0000000000..73c1c75ebe --- /dev/null +++ b/src/mca/gpr/replica/gpr_replica_cleanup.c @@ -0,0 +1,111 @@ +/* + * $HEADER$ + */ +/** @file: + * + * The Open MPI general purpose registry - implementation. + * + */ + +/* + * includes + */ + +#include "ompi_config.h" + +#include "gpr_replica.h" +#include "gpr_replica_internals.h" + + +void mca_gpr_replica_cleanup_job(mca_ns_base_jobid_t jobid) +{ + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + mca_gpr_replica_cleanup_job_nl(jobid); + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); +} + + +void mca_gpr_replica_cleanup_job_nl(mca_ns_base_jobid_t jobid) +{ + mca_gpr_replica_segment_t *seg, *next_seg; + mca_gpr_replica_trigger_list_t *trig, *next_trig; + + /* traverse the registry */ + for (seg = (mca_gpr_replica_segment_t*)ompi_list_get_first(&mca_gpr_replica_head.registry); + seg != (mca_gpr_replica_segment_t*)ompi_list_get_end(&mca_gpr_replica_head.registry);) { + + next_seg = (mca_gpr_replica_segment_t*)ompi_list_get_next(seg); + + if (jobid == seg->owning_job) { /* this is a segment associated with this jobid - remove it */ + + mca_gpr_replica_delete_segment_nl(seg); + + } else { /* check this seg subscriptions/synchros with recipients from this jobid */ + for (trig = (mca_gpr_replica_trigger_list_t*)ompi_list_get_first(&seg->triggers); + trig != (mca_gpr_replica_trigger_list_t*)ompi_list_get_end(&seg->triggers);) { + + next_trig = (mca_gpr_replica_trigger_list_t*)ompi_list_get_next(trig); + + if (trig->owning_job == jobid) { + mca_gpr_replica_remove_trigger(trig->local_idtag); + } + trig = next_trig; + } + } + seg = next_seg; + } +} + + +void mca_gpr_replica_cleanup_proc(bool purge, ompi_process_name_t *proc) +{ + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + mca_gpr_replica_cleanup_proc_nl(purge, ompi_rte_get_self()); + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); +} + + +void mca_gpr_replica_cleanup_proc_nl(bool purge, ompi_process_name_t *proc) +{ + mca_gpr_replica_segment_t *seg; + mca_gpr_replica_trigger_list_t *trig; + char *procname; + mca_ns_base_jobid_t jobid; + + procname = ompi_name_server.get_proc_name_string(proc); + jobid = ompi_name_server.get_jobid(proc); + + /* search all segments for this process name - remove all references + */ + for (seg = (mca_gpr_replica_segment_t*)ompi_list_get_first(&mca_gpr_replica_head.registry); + seg != (mca_gpr_replica_segment_t*)ompi_list_get_end(&mca_gpr_replica_head.registry); + seg = (mca_gpr_replica_segment_t*)ompi_list_get_next(seg)) { + + if (jobid == seg->owning_job) { + /* adjust any startup synchro and/or shutdown synchros owned + * by the associated jobid by one. + */ + for (trig = (mca_gpr_replica_trigger_list_t*)ompi_list_get_first(&seg->triggers); + trig != (mca_gpr_replica_trigger_list_t*)ompi_list_get_end(&seg->triggers); + trig = (mca_gpr_replica_trigger_list_t*)ompi_list_get_next(trig)) { + if ((OMPI_REGISTRY_SYNCHRO_MODE_STARTUP & trig->synch_mode) || + (OMPI_REGISTRY_SYNCHRO_MODE_SHUTDOWN & trig->synch_mode)) { + trig->count--; + } + } + mca_gpr_replica_check_synchros(seg); + } + + if (purge) { + /* remove name from the dictionary and set all associated object keys to invalid */ + mca_gpr_replica_delete_key(seg, procname); + } + } + + if (purge) { + /* purge all subscriptions with this process as recipient */ + mca_gpr_replica_purge_subscriptions(proc); + } + + return; +} diff --git a/src/mca/gpr/replica/gpr_replica_component.c b/src/mca/gpr/replica/gpr_replica_component.c index 18d2b75feb..540f8f8268 100644 --- a/src/mca/gpr/replica/gpr_replica_component.c +++ b/src/mca/gpr/replica/gpr_replica_component.c @@ -13,20 +13,6 @@ */ #include "ompi_config.h" -#include - -#include "include/constants.h" - -#include "threads/mutex.h" - -#include "util/proc_info.h" -#include "util/output.h" -#include "util/bufpack.h" - -#include "mca/mca.h" -#include "mca/base/mca_base_param.h" -#include "mca/oob/base/base.h" -#include "mca/gpr/base/base.h" #include "gpr_replica.h" #include "gpr_replica_internals.h" @@ -56,18 +42,32 @@ OMPI_COMP_EXPORT mca_gpr_base_component_t mca_gpr_replica_component = { * setup the function pointers for the module */ static mca_gpr_base_module_t mca_gpr_replica = { - gpr_replica_get, - gpr_replica_put, - gpr_replica_delete_segment, - gpr_replica_subscribe, - gpr_replica_unsubscribe, - gpr_replica_synchro, - gpr_replica_cancel_synchro, - gpr_replica_delete_object, - gpr_replica_index, - gpr_replica_test_internals, - gpr_replica_rte_register, - gpr_replica_rte_unregister + mca_gpr_replica_get, + mca_gpr_replica_put, + mca_gpr_replica_delete_segment, + mca_gpr_replica_subscribe, + mca_gpr_replica_unsubscribe, + mca_gpr_replica_synchro, + mca_gpr_replica_cancel_synchro, + mca_gpr_replica_delete_object, + mca_gpr_replica_index, + mca_gpr_replica_test_internals, + mca_gpr_replica_begin_compound_cmd, + mca_gpr_replica_stop_compound_cmd, + mca_gpr_replica_exec_compound_cmd, + mca_gpr_replica_dump, + mca_gpr_replica_silent_mode_on, + mca_gpr_replica_silent_mode_off, + mca_gpr_replica_notify_off, + mca_gpr_replica_notify_on, + mca_gpr_replica_assume_ownership, + mca_gpr_replica_triggers_active, + mca_gpr_replica_triggers_inactive, + mca_gpr_replica_get_startup_msg, + mca_gpr_replica_get_shutdown_msg, + mca_gpr_replica_cleanup_job, + mca_gpr_replica_cleanup_proc, + mca_gpr_replica_deliver_notify_msg }; /* @@ -82,10 +82,41 @@ static bool initialized = false; mca_gpr_replica_t mca_gpr_replica_head; ompi_list_t mca_gpr_replica_notify_request_tracker; ompi_list_t mca_gpr_replica_callbacks; -mca_gpr_notify_id_t mca_gpr_replica_last_notify_id_tag; +ompi_list_t mca_gpr_replica_notify_off_list; +ompi_registry_notify_id_t mca_gpr_replica_last_notify_id_tag; ompi_list_t mca_gpr_replica_free_notify_id_tags; int mca_gpr_replica_debug; ompi_mutex_t mca_gpr_replica_mutex; +bool mca_gpr_replica_compound_cmd_mode; +bool mca_gpr_replica_exec_compound_cmd_mode; +ompi_buffer_t mca_gpr_replica_compound_cmd; +ompi_mutex_t mca_gpr_replica_wait_for_compound_mutex; +ompi_condition_t mca_gpr_replica_compound_cmd_condition; +int mca_gpr_replica_compound_cmd_waiting; +bool mca_gpr_replica_silent_mode; + + +/* constructor - used to initialize state of notify_off instance */ +static void mca_gpr_replica_notify_off_construct(mca_gpr_replica_notify_off_t* off) +{ + off->sub_number = OMPI_REGISTRY_NOTIFY_ID_MAX; + off->proc = NULL; +} + +/* destructor - used to free any resources held by notify_off instance */ +static void mca_gpr_replica_notify_off_destructor(mca_gpr_replica_notify_off_t* off) +{ + if (NULL != off->proc) { + free(off->proc); + } +} + +/* define instance of notify_off class */ +OBJ_CLASS_INSTANCE( + mca_gpr_replica_notify_off_t, + ompi_list_item_t, + mca_gpr_replica_notify_off_construct, + mca_gpr_replica_notify_off_destructor); /* constructor - used to initialize state of keytable instance */ @@ -166,21 +197,21 @@ static void mca_gpr_replica_trigger_list_construct(mca_gpr_replica_trigger_list_ trig->tokens = NULL; trig->trigger = 0; trig->count = 0; - trig->id_tag = MCA_GPR_NOTIFY_ID_MAX; + trig->local_idtag = OMPI_REGISTRY_NOTIFY_ID_MAX; } /* destructor - used to free any resources held by instance */ static void mca_gpr_replica_trigger_list_destructor(mca_gpr_replica_trigger_list_t* trig) { char **tok; - int i; + uint i; if (NULL != trig->keys) { free(trig->keys); trig->keys = NULL; } if (NULL != trig->tokens) { - for (i=0, tok=trig->tokens; i< trig->num_keys; i++) { + for (i=0, tok=trig->tokens; i< (uint)trig->num_keys; i++) { free(*tok); *tok = NULL; tok++; @@ -273,8 +304,11 @@ OBJ_CLASS_INSTANCE( /* constructor - used to initialize state of segment instance */ static void mca_gpr_replica_segment_construct(mca_gpr_replica_segment_t* seg) { - seg->segment = 0; + seg->name = NULL; + seg->owning_job = MCA_NS_BASE_JOBID_MAX; + seg->key = MCA_GPR_REPLICA_KEY_MAX; seg->lastkey = 0; + seg->triggers_active = false; OBJ_CONSTRUCT(&seg->registry_entries, ompi_list_t); OBJ_CONSTRUCT(&seg->triggers, ompi_list_t); OBJ_CONSTRUCT(&seg->keytable, ompi_list_t); @@ -293,6 +327,10 @@ static void mca_gpr_replica_segment_destructor(mca_gpr_replica_segment_t* seg) ompi_output(0, "entered segment destructor"); } + if (NULL != seg->name) { + free(seg->name); + } + while (NULL != (reg = (mca_gpr_replica_core_t*)ompi_list_remove_first(&seg->registry_entries))) { OBJ_RELEASE(reg); } @@ -314,7 +352,7 @@ static void mca_gpr_replica_segment_destructor(mca_gpr_replica_segment_t* seg) OBJ_DESTRUCT(&seg->freekeys); } -/* define instance of ompi_class_t */ +/* define instance of mca_gpr_replica_segment_t */ OBJ_CLASS_INSTANCE( mca_gpr_replica_segment_t, /* type name */ ompi_list_item_t, /* parent "class" name */ @@ -322,10 +360,35 @@ OBJ_CLASS_INSTANCE( mca_gpr_replica_segment_destructor); /* destructor */ -/* - * don't really need this function - could just put NULL in the above structure - * Just holding the place in case we decide there is something we need to do - */ +/* constructor - used to initialize notify message instance */ +static void mca_gpr_replica_notify_request_tracker_construct(mca_gpr_replica_notify_request_tracker_t* req) +{ + req->requestor = NULL; + req->callback = NULL; + req->user_tag = NULL; + req->local_idtag = OMPI_REGISTRY_NOTIFY_ID_MAX; + req->remote_idtag = OMPI_REGISTRY_NOTIFY_ID_MAX; + req->segptr = NULL; + req->action = OMPI_REGISTRY_NOTIFY_NONE; +} + +/* destructor - used to free any resources held by instance */ +static void mca_gpr_replica_notify_request_tracker_destructor(mca_gpr_replica_notify_request_tracker_t* req) +{ + if (NULL != req->requestor) { + free(req->requestor); + } +} + +/* define instance of ompi_class_t */ +OBJ_CLASS_INSTANCE( + mca_gpr_replica_notify_request_tracker_t, /* type name */ + ompi_list_item_t, /* parent "class" name */ + mca_gpr_replica_notify_request_tracker_construct, /* constructor */ + mca_gpr_replica_notify_request_tracker_destructor); /* destructor */ + + + int mca_gpr_replica_open(void) { int id; @@ -347,7 +410,6 @@ int mca_gpr_replica_close(void) mca_gpr_base_module_t *mca_gpr_replica_init(bool *allow_multi_user_threads, bool *have_hidden_threads, int *priority) { - /* ompi_output(0, "entered replica init"); */ /* If we are to host a replica, then we want to be selected, so do all the setup and return the module */ @@ -366,25 +428,26 @@ mca_gpr_base_module_t *mca_gpr_replica_init(bool *allow_multi_user_threads, bool *allow_multi_user_threads = true; *have_hidden_threads = false; - /* setup the thread lock */ + /* setup the thread locks and condition variables */ OBJ_CONSTRUCT(&mca_gpr_replica_mutex, ompi_mutex_t); + OBJ_CONSTRUCT(&mca_gpr_replica_wait_for_compound_mutex, ompi_mutex_t); + OBJ_CONSTRUCT(&mca_gpr_replica_compound_cmd_condition, ompi_condition_t); + + /* initialize the registry compound mode */ + mca_gpr_replica_compound_cmd_mode = false; + mca_gpr_replica_exec_compound_cmd_mode = false; + mca_gpr_replica_compound_cmd_waiting = 0; /* initialize the registry head */ +/* OBJ_CONSTRUCT(&mca_gpr_replica_head.registry, ompi_object_t); */ OBJ_CONSTRUCT(&mca_gpr_replica_head.registry, ompi_list_t); - - if (mca_gpr_replica_debug) { - ompi_output(0, "registry head setup"); - } + OBJ_CONSTRUCT(&mca_gpr_replica_head.triggers, ompi_list_t); /* initialize the global dictionary for segment id's */ OBJ_CONSTRUCT(&mca_gpr_replica_head.segment_dict, ompi_list_t); OBJ_CONSTRUCT(&mca_gpr_replica_head.freekeys, ompi_list_t); mca_gpr_replica_head.lastkey = 0; - if (mca_gpr_replica_debug) { - ompi_output(0, "global dict setup"); - } - /* initialize the notify request tracker */ OBJ_CONSTRUCT(&mca_gpr_replica_notify_request_tracker, ompi_list_t); mca_gpr_replica_last_notify_id_tag = 0; @@ -397,6 +460,12 @@ mca_gpr_base_module_t *mca_gpr_replica_init(bool *allow_multi_user_threads, bool /* initialize the callback list head */ OBJ_CONSTRUCT(&mca_gpr_replica_callbacks, ompi_list_t); + /* initialize the mode trackers */ + OBJ_CONSTRUCT(&mca_gpr_replica_notify_off_list, ompi_list_t); + + /* initialize any local variables */ + mca_gpr_replica_silent_mode = false; + /* issue the non-blocking receive */ rc = mca_oob_recv_packed_nb(MCA_OOB_NAME_ANY, MCA_OOB_TAG_GPR, 0, mca_gpr_replica_recv, NULL); if(rc != OMPI_SUCCESS && rc != OMPI_ERR_NOT_IMPLEMENTED) { @@ -469,779 +538,3 @@ int mca_gpr_replica_finalize(void) mca_oob_recv_cancel(MCA_OOB_NAME_ANY, MCA_OOB_TAG_GPR); return OMPI_SUCCESS; } - -/* - * handle message from proxies - */ - -void mca_gpr_replica_recv(int status, ompi_process_name_t* sender, - ompi_buffer_t buffer, int tag, - void* cbdata) -{ - ompi_buffer_t answer, error_answer, reg_buffer; - ompi_registry_object_t *object; - ompi_registry_object_size_t object_size; - ompi_registry_mode_t mode; - ompi_registry_notify_action_t action; - ompi_registry_value_t *regval; - ompi_list_t *returned_list; - ompi_registry_internal_test_results_t *testval; - ompi_registry_index_value_t *indexval; - char **tokens, **tokptr; - int32_t num_tokens, test_level, i, trigger, id_tag, num_procs; - mca_gpr_notify_id_t local_idtag1, local_idtag2, start_idtag, end_idtag; - pid_t pid; - mca_gpr_cmd_flag_t command; - char *segment, *contact_info, *nodename, *proc_name; - int32_t response, synchro_mode; - - if (mca_gpr_replica_debug) { - ompi_output(0, "gpr replica: received message"); - } - - if (OMPI_SUCCESS != ompi_buffer_init(&answer, 0)) { - /* RHC -- not sure what to do if this fails */ - } - - if (OMPI_SUCCESS != ompi_unpack(buffer, &command, 1, MCA_GPR_OOB_PACK_CMD)) { - goto RETURN_ERROR; - } - - /****** DELETE SEGMENT *****/ - if (MCA_GPR_DELETE_SEGMENT_CMD == command) { /* got command to delete a segment */ - - if (mca_gpr_replica_debug) { - ompi_output(0, "\tdelete segment cmd"); - } - - if (0 > ompi_unpack_string(buffer, &segment)) { - goto RETURN_ERROR; - } - - response = (int32_t)gpr_replica_delete_segment(segment); - - if (OMPI_SUCCESS != ompi_pack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) { - goto RETURN_ERROR; - } - if (OMPI_SUCCESS != ompi_pack(answer, &response, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - if (0 > mca_oob_send_packed(sender, answer, tag, 0)) { - /* RHC -- not sure what to do if the return send fails */ - } - - /***** PUT *****/ - } else if (MCA_GPR_PUT_CMD == command) { /* got command to put object on registry */ - - if (mca_gpr_replica_debug) { - ompi_output(0, "\tput cmd"); - } - - if (OMPI_SUCCESS != ompi_unpack(buffer, &mode, 1, MCA_GPR_OOB_PACK_MODE)) { - goto RETURN_ERROR; - } - - if (0 > ompi_unpack_string(buffer, &segment)) { - goto RETURN_ERROR; - } - - if (OMPI_SUCCESS != ompi_unpack(buffer, &num_tokens, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - - if (0 >= num_tokens) { /** no tokens provided - error for PUT */ - goto RETURN_ERROR; - } - - tokens = (char**)malloc((num_tokens+1)*sizeof(char*)); - - tokptr = tokens; - for (i=0; i ompi_unpack_string(buffer, tokptr)) { - goto RETURN_ERROR; - } - tokptr++; - } - *tokptr = NULL; - - if (OMPI_SUCCESS != ompi_unpack(buffer, &object_size, 1, MCA_GPR_OOB_PACK_OBJECT_SIZE)) { - goto RETURN_ERROR; - } - - if (0 >= object_size) { /* error condition - nothing to store */ - goto RETURN_ERROR; - } - - object = (ompi_registry_object_t *)malloc(object_size); - if (OMPI_SUCCESS != ompi_unpack(buffer, object, object_size, OMPI_BYTE)) { - goto RETURN_ERROR; - } - - response = (int32_t)gpr_replica_put(mode, segment, tokens, object, object_size); - - if (OMPI_SUCCESS != ompi_pack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) { - goto RETURN_ERROR; - } - - if (OMPI_SUCCESS != ompi_pack(answer, &response, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - if (0 > mca_oob_send_packed(sender, answer, tag, 0)) { - /* RHC -- not sure what to do if the return send fails */ - } - - /***** GET *****/ - } else if (MCA_GPR_GET_CMD == command) { /* got command to put object on registry */ - - if (mca_gpr_replica_debug) { - ompi_output(0, "\tget cmd"); - } - - if (OMPI_SUCCESS != ompi_unpack(buffer, &mode, 1, MCA_GPR_OOB_PACK_MODE)) { - goto RETURN_ERROR; - } - - if (0 > ompi_unpack_string(buffer, &segment)) { - goto RETURN_ERROR; - } - - if (OMPI_SUCCESS != ompi_unpack(buffer, &num_tokens, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - - if (0 >= num_tokens) { /* no tokens provided - wildcard case */ - tokens = NULL; - } else { /* tokens provided */ - tokens = (char**)malloc((num_tokens+1)*sizeof(char*)); - tokptr = tokens; - for (i=0; i ompi_unpack_string(buffer, tokptr)) { - goto RETURN_ERROR; - } - tokptr++; - } - *tokptr = NULL; - } - - returned_list = gpr_replica_get(mode, segment, tokens); - - if (OMPI_SUCCESS != ompi_pack(answer, (void*)&command, 1, MCA_GPR_OOB_PACK_CMD)) { - goto RETURN_ERROR; - } - - response = (int32_t)ompi_list_get_size(returned_list); - if (OMPI_SUCCESS != ompi_pack(answer, (void*)&response, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - - if (0 < response) { /* don't send anything else back if the list is empty */ - for (regval = (ompi_registry_value_t*)ompi_list_get_first(returned_list); - regval != (ompi_registry_value_t*)ompi_list_get_end(returned_list); - regval = (ompi_registry_value_t*)ompi_list_get_next(regval)) { /* traverse the list */ - if (OMPI_SUCCESS != ompi_pack(answer, ®val->object_size, 1, MCA_GPR_OOB_PACK_OBJECT_SIZE)) { - goto RETURN_ERROR; - } - if (OMPI_SUCCESS != ompi_pack(answer, regval->object, regval->object_size, OMPI_BYTE)) { - goto RETURN_ERROR; - } - } - } - if (0 > mca_oob_send_packed(sender, answer, tag, 0)) { - /* RHC -- not sure what to do if the return send fails */ - } - - /***** DELETE OBJECT *****/ - } else if (MCA_GPR_DELETE_OBJECT_CMD == command) { - - if (mca_gpr_replica_debug) { - ompi_output(0, "\tdelete object cmd"); - } - - if (OMPI_SUCCESS != ompi_unpack(buffer, &mode, 1, MCA_GPR_OOB_PACK_MODE)) { - goto RETURN_ERROR; - } - - if (0 > ompi_unpack_string(buffer, &segment)) { - goto RETURN_ERROR; - } - - if (OMPI_SUCCESS != ompi_unpack(buffer, &num_tokens, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - - if (0 >= num_tokens) { /* no tokens provided - wildcard case */ - tokens = NULL; - } else { /* tokens provided */ - tokens = (char**)malloc((num_tokens+1)*sizeof(char*)); - tokptr = tokens; - for (i=0; i ompi_unpack_string(buffer, tokptr)) { - goto RETURN_ERROR; - } - tokptr++; - } - *tokptr = NULL; - } - - response = (int32_t)gpr_replica_delete_object(mode, segment, tokens); - - if (OMPI_SUCCESS != ompi_pack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) { - goto RETURN_ERROR; - } - - if (OMPI_SUCCESS != ompi_pack(answer, &response, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - - if (0 > mca_oob_send_packed(sender, answer, tag, 0)) { - /* RHC -- not sure what to do if the return send fails */ - } - - /***** INDEX *****/ - } else if (MCA_GPR_INDEX_CMD == command) { - - if (mca_gpr_replica_debug) { - ompi_output(0, "\tindex cmd"); - } - - if (OMPI_SUCCESS != ompi_unpack(buffer, &mode, 1, MCA_GPR_OOB_PACK_MODE)) { - goto RETURN_ERROR; - } - - if (0 == mode) { /* only want dict of segments */ - segment = NULL; - } else { - if (0 > ompi_unpack_string(buffer, &segment)) { - goto RETURN_ERROR; - } - } - - returned_list = gpr_replica_index(segment); - - if (OMPI_SUCCESS != ompi_pack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) { - goto RETURN_ERROR; - } - - response = (int32_t)ompi_list_get_size(returned_list); - if (OMPI_SUCCESS != ompi_pack(answer, &response, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - - if (0 < response) { /* don't send anything else back if the list is empty */ - for (indexval = (ompi_registry_index_value_t*)ompi_list_get_first(returned_list); - indexval != (ompi_registry_index_value_t*)ompi_list_get_end(returned_list); - indexval = (ompi_registry_index_value_t*)ompi_list_get_next(indexval)) { /* traverse the list */ - if (OMPI_SUCCESS != ompi_pack_string(answer, indexval->token)) { - goto RETURN_ERROR; - } - } - } - - if (0 > mca_oob_send_packed(sender, answer, tag, 0)) { - /* RHC -- not sure what to do if the return send fails */ - } - - /***** SUBSCRIBE *****/ - } else if (MCA_GPR_SUBSCRIBE_CMD == command) { - - if (mca_gpr_replica_debug) { - ompi_output(0, "\tsubscribe cmd"); - } - - if (OMPI_SUCCESS != ompi_unpack(buffer, &mode, 1, MCA_GPR_OOB_PACK_MODE)) { - goto RETURN_ERROR; - } - - if (OMPI_SUCCESS != ompi_unpack(buffer, &action, 1, MCA_GPR_OOB_PACK_ACTION)) { - goto RETURN_ERROR; - } - - if (0 > ompi_unpack_string(buffer, &segment)) { - goto RETURN_ERROR; - } - - if (OMPI_SUCCESS != ompi_unpack(buffer, &num_tokens, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - - if (0 < num_tokens) { /* tokens provided */ - tokens = (char**)malloc((num_tokens+1)*sizeof(char*)); - tokptr = tokens; - for (i=0; i ompi_unpack_string(buffer, tokptr)) { - goto RETURN_ERROR; - } - tokptr++; - } - *tokptr = NULL; - } else { /* no tokens provided - wildcard case */ - tokens = NULL; - } - - if (OMPI_SUCCESS != ompi_unpack(buffer, &id_tag, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - - /******* LOCK *****/ - OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); - - /* enter request on notify tracking system */ - local_idtag1 = gpr_replica_enter_notify_request(sender, id_tag, NULL, NULL); - - response = (int32_t)gpr_replica_subscribe_nl(mode, action, segment, tokens, - local_idtag1); - - OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); - /****** UNLOCK ******/ - - if (OMPI_SUCCESS != ompi_pack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) { - goto RETURN_ERROR; - } - - if (OMPI_SUCCESS != ompi_pack(answer, &response, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - - if (0 > mca_oob_send_packed(sender, answer, tag, 0)) { - /* RHC -- not sure what to do if the return send fails */ - } - - /* process any resulting callbacks */ - gpr_replica_process_callbacks(); - - /***** UNSUBSCRIBE *****/ - } else if (MCA_GPR_UNSUBSCRIBE_CMD == command) { - - if (mca_gpr_replica_debug) { - ompi_output(0, "\tunsubscribe cmd"); - } - - if (OMPI_SUCCESS != ompi_unpack(buffer, &mode, 1, MCA_GPR_OOB_PACK_MODE)) { - goto RETURN_ERROR; - } - - if (OMPI_SUCCESS != ompi_unpack(buffer, &action, 1, MCA_GPR_OOB_PACK_ACTION)) { - goto RETURN_ERROR; - } - - if (0 > ompi_unpack_string(buffer, &segment)) { - goto RETURN_ERROR; - } - - if (OMPI_SUCCESS != ompi_unpack(buffer, &num_tokens, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - - if (0 < num_tokens) { /* tokens provided */ - tokens = (char**)malloc((num_tokens+1)*sizeof(char*)); - tokptr = tokens; - for (i=0; i ompi_unpack_string(buffer, tokptr)) { - goto RETURN_ERROR; - } - tokptr++; - } - *tokptr = NULL; - } else { /* no tokens provided - wildcard case */ - tokens = NULL; - } - - /******* LOCK *****/ - OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); - - response = (int32_t)gpr_replica_unsubscribe_nl(mode, action, segment, tokens); - - OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); - /****** UNLOCK ******/ - - if (OMPI_SUCCESS != ompi_pack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) { - goto RETURN_ERROR; - } - - if (OMPI_SUCCESS != ompi_pack(answer, &response, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - - if (0 > mca_oob_send_packed(sender, answer, tag, 0)) { - /* RHC -- not sure what to do if the return send fails */ - } - - - /***** SYNCHRO *****/ - } else if (MCA_GPR_SYNCHRO_CMD == command) { - - if (mca_gpr_replica_debug) { - ompi_output(0, "\tsynchro cmd"); - } - - if (OMPI_SUCCESS != ompi_unpack(buffer, &synchro_mode, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - - if (OMPI_REGISTRY_SYNCHRO_MODE_NONE == synchro_mode) { - goto RETURN_ERROR; - } - - if (OMPI_SUCCESS != ompi_unpack(buffer, &mode, 1, MCA_GPR_OOB_PACK_MODE)) { - goto RETURN_ERROR; - } - - if (0 > ompi_unpack_string(buffer, &segment)) { - goto RETURN_ERROR; - } - - if (OMPI_SUCCESS != ompi_unpack(buffer, &num_tokens, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - - if (0 < num_tokens) { /* tokens provided */ - tokens = (char**)malloc((num_tokens+1)*sizeof(char*)); - tokptr = tokens; - for (i=0; i ompi_unpack_string(buffer, tokptr)) { - goto RETURN_ERROR; - } - tokptr++; - } - *tokptr = NULL; - } else { /* no tokens provided - wildcard case, just count entries on segment */ - tokens = NULL; - } - - if (OMPI_SUCCESS != ompi_unpack(buffer, &trigger, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - - if (OMPI_SUCCESS != ompi_unpack(buffer, &id_tag, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - - - /******* LOCK *****/ - OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); - - /* enter request on notify tracking system */ - local_idtag1 = gpr_replica_enter_notify_request(sender, id_tag, NULL, NULL); - - response = (int32_t)gpr_replica_synchro_nl(synchro_mode, - mode, segment, tokens, - trigger, local_idtag1); - - OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); - /****** UNLOCK ******/ - - if (OMPI_SUCCESS != ompi_pack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) { - goto RETURN_ERROR; - } - - if (OMPI_SUCCESS != ompi_pack(answer, &response, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - - if (0 > mca_oob_send_packed(sender, answer, tag, 0)) { - /* RHC -- not sure what to do if the return send fails */ - } - - /* process any resulting callbacks */ - gpr_replica_process_callbacks(); - - - /***** CANCEL SYNCHRO *****/ - } else if (MCA_GPR_CANCEL_SYNCHRO_CMD == command) { - - if (mca_gpr_replica_debug) { - ompi_output(0, "\tcancel synchro cmd"); - } - - if (OMPI_SUCCESS != ompi_unpack(buffer, &synchro_mode, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - - if (OMPI_REGISTRY_SYNCHRO_MODE_NONE == synchro_mode) { - goto RETURN_ERROR; - } - - if (OMPI_SUCCESS != ompi_unpack(buffer, &mode, 1, MCA_GPR_OOB_PACK_MODE)) { - goto RETURN_ERROR; - } - - if (0 > ompi_unpack_string(buffer, &segment)) { - goto RETURN_ERROR; - } - - if (OMPI_SUCCESS != ompi_unpack(buffer, &num_tokens, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - - if (0 < num_tokens) { /* tokens provided */ - tokens = (char**)malloc((num_tokens+1)*sizeof(char*)); - tokptr = tokens; - for (i=0; i ompi_unpack_string(buffer, tokptr)) { - goto RETURN_ERROR; - } - tokptr++; - } - *tokptr = NULL; - } else { /* no tokens provided - wildcard case, just count entries on segment */ - tokens = NULL; - } - - if (OMPI_SUCCESS != ompi_unpack(buffer, &trigger, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - - /******* LOCK *****/ - OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); - - response = (int32_t)gpr_replica_cancel_synchro_nl(synchro_mode, mode, - segment, tokens, trigger); - - OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); - /****** UNLOCK ******/ - - if (OMPI_SUCCESS != ompi_pack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) { - goto RETURN_ERROR; - } - - if (OMPI_SUCCESS != ompi_pack(answer, &response, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - - if (0 > mca_oob_send_packed(sender, answer, tag, 0)) { - /* RHC -- not sure what to do if the return send fails */ - } - - - /***** REGISTER *****/ - } else if (MCA_GPR_RTE_REGISTER_CMD == command) { - - if (0 > ompi_unpack_string(buffer, &contact_info)) { - goto RETURN_ERROR; - } - - - if (OMPI_SUCCESS != ompi_unpack(buffer, &num_procs, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - - - if (OMPI_SUCCESS != ompi_unpack(buffer, &pid, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - - - if (0 > ompi_unpack_string(buffer, &nodename)) { - goto RETURN_ERROR; - } - - - ompi_buffer_init(®_buffer, 0); - ompi_pack_string(reg_buffer, contact_info); - ompi_pack(reg_buffer, &pid, 1, OMPI_INT32); - ompi_pack_string(buffer, nodename); - - - if (OMPI_SUCCESS != ompi_unpack(buffer, &start_idtag, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - - if (OMPI_SUCCESS != ompi_unpack(buffer, &end_idtag, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - - - /******* LOCK *****/ - OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); - - /* enter start request on notify tracking system */ - local_idtag1 = gpr_replica_enter_notify_request(sender, start_idtag, NULL, NULL); - - /* enter end request on notify tracking system */ - local_idtag2 = gpr_replica_enter_notify_request(sender, end_idtag, NULL, NULL); - - - /* do registration */ - response = (int32_t)gpr_replica_rte_register_nl(contact_info, buffer, - num_procs, local_idtag1, local_idtag2); - - - OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); - /****** UNLOCK ******/ - - if (OMPI_SUCCESS != ompi_pack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) { - goto RETURN_ERROR; - } - - if (OMPI_SUCCESS != ompi_pack(answer, &response, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - - if (0 > mca_oob_send_packed(sender, answer, tag, 0)) { - /* RHC -- not sure what to do if the return send fails */ - } - - /* process any resulting callbacks */ - gpr_replica_process_callbacks(); - - - - /***** UNREGISTER *****/ - } else if (MCA_GPR_RTE_UNREGISTER_CMD == command) { - - if (0 > ompi_unpack_string(buffer, &proc_name)) { - goto RETURN_ERROR; - } - - /******* LOCK *****/ - OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); - - response = (int32_t)gpr_replica_rte_unregister_nl(proc_name); - - OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); - /****** UNLOCK ******/ - - - if (OMPI_SUCCESS != ompi_pack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) { - goto RETURN_ERROR; - } - - if (OMPI_SUCCESS != ompi_pack(answer, &response, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - - if (0 > mca_oob_send_packed(sender, answer, tag, 0)) { - /* RHC -- not sure what to do if the return send fails */ - } - - /* process any resulting callbacks */ - gpr_replica_process_callbacks(); - - - - /***** TEST INTERNALS *****/ - } else if (MCA_GPR_TEST_INTERNALS_CMD == command) { - - - if ((OMPI_SUCCESS != ompi_unpack(buffer, &test_level, 1, OMPI_INT32)) || - (0 > test_level)) { - goto RETURN_ERROR; - } - - returned_list = gpr_replica_test_internals(test_level); - - if (OMPI_SUCCESS != ompi_pack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) { - goto RETURN_ERROR; - } - - response = (int32_t)ompi_list_get_size(returned_list); - if (OMPI_SUCCESS != ompi_pack(answer, &response, 1, OMPI_INT32)) { - goto RETURN_ERROR; - } - - if (0 < response) { /* don't send anything else back if the list is empty */ - for (testval = (ompi_registry_internal_test_results_t*)ompi_list_get_first(returned_list); - testval != (ompi_registry_internal_test_results_t*)ompi_list_get_end(returned_list); - testval = (ompi_registry_internal_test_results_t*)ompi_list_get_next(testval)) { /* traverse the list */ - if (OMPI_SUCCESS != ompi_pack_string(answer, testval->test)) { - goto RETURN_ERROR; - } - if (OMPI_SUCCESS != ompi_pack_string(answer, testval->message)) { - goto RETURN_ERROR; - } - } - } - if (0 > mca_oob_send_packed(sender, answer, tag, 0)) { - /* RHC -- not sure what to do if the return send fails */ - } - - /**** UNRECOGNIZED COMMAND ****/ - } else { /* got an unrecognized command */ - RETURN_ERROR: - ompi_buffer_init(&error_answer, 8); - command = MCA_GPR_ERROR; - ompi_pack(error_answer, (void*)&command, 1, MCA_GPR_OOB_PACK_CMD); - mca_oob_send_packed(sender, error_answer, tag, 0); - ompi_buffer_free(error_answer); - } - - ompi_buffer_free(answer); - - /* reissue the non-blocking receive */ - mca_oob_recv_packed_nb(MCA_OOB_NAME_ANY, MCA_OOB_TAG_GPR, 0, mca_gpr_replica_recv, NULL); -} - - -void gpr_replica_remote_notify(ompi_process_name_t *recipient, int recipient_tag, - ompi_registry_notify_message_t *message) -{ - ompi_buffer_t msg; - mca_gpr_cmd_flag_t command; - int32_t num_items, i; - ompi_registry_value_t *regval; - char **tokptr; - int recv_tag; - - if (mca_gpr_replica_debug) { - ompi_output(0, "sending trigger message"); - } - - command = MCA_GPR_NOTIFY_CMD; - recv_tag = MCA_OOB_TAG_GPR_NOTIFY; - - if (OMPI_SUCCESS != ompi_buffer_init(&msg, 0)) { - return; - } - - if (OMPI_SUCCESS != ompi_pack(msg, &command, 1, MCA_GPR_OOB_PACK_CMD)) { - return; - } - - i = (int32_t)recipient_tag; - if (OMPI_SUCCESS != ompi_pack(msg, &i, 1, OMPI_INT32)) { - return; - } - - if (OMPI_SUCCESS != ompi_pack(msg, &message->trig_action, 1, MCA_GPR_OOB_PACK_ACTION)) { - return; - } - - if (OMPI_SUCCESS != ompi_pack(msg, &message->trig_synchro, 1, MCA_GPR_OOB_PACK_SYNCHRO_MODE)) { - return; - } - - - num_items = (int32_t)ompi_list_get_size(&message->data); - if (OMPI_SUCCESS != ompi_pack(msg, &num_items, 1, OMPI_INT32)) { - return; - } - - if (0 < num_items) { /* don't send anything else back if the list is empty */ - while (NULL != (regval = (ompi_registry_value_t*)ompi_list_remove_first(&message->data))) { - if (OMPI_SUCCESS != ompi_pack(msg, ®val->object_size, 1, MCA_GPR_OOB_PACK_OBJECT_SIZE)) { - return; - } - if (OMPI_SUCCESS != ompi_pack(msg, regval->object, regval->object_size, OMPI_BYTE)) { - return; - } - /* TSW - should we add */ - /* OBJ_RELEASE(regval); */ - } - } - if (OMPI_SUCCESS != ompi_pack(msg, &message->num_tokens, 1, OMPI_INT32)) { - return; - } - - for (i=0, tokptr=message->tokens; i < message->num_tokens; i++, tokptr++) { - if (OMPI_SUCCESS != ompi_pack_string(msg, *tokptr)) { - return; - } - } - - if (0 > mca_oob_send_packed(recipient, msg, recv_tag, 0)) { - return; - } - - ompi_buffer_free(msg); - OBJ_RELEASE(message); -} diff --git a/src/mca/gpr/replica/gpr_replica_compound_cmd.c b/src/mca/gpr/replica/gpr_replica_compound_cmd.c new file mode 100644 index 0000000000..10944257e2 --- /dev/null +++ b/src/mca/gpr/replica/gpr_replica_compound_cmd.c @@ -0,0 +1,106 @@ +/* + * $HEADER$ + */ +/** @file: + * + * The Open MPI general purpose registry - implementation. + * + */ + +/* + * includes + */ + +#include "ompi_config.h" + +#include "gpr_replica.h" +#include "gpr_replica_internals.h" + + +int mca_gpr_replica_begin_compound_cmd(void) +{ + size_t size; + + OMPI_THREAD_LOCK(&mca_gpr_replica_wait_for_compound_mutex); + + while (mca_gpr_replica_compound_cmd_mode) { + mca_gpr_replica_compound_cmd_waiting++; + ompi_condition_wait(&mca_gpr_replica_compound_cmd_condition, &mca_gpr_replica_wait_for_compound_mutex); + mca_gpr_replica_compound_cmd_waiting--; + } + + mca_gpr_replica_compound_cmd_mode = true; + ompi_buffer_size(mca_gpr_replica_compound_cmd, &size); + if (0 < size) { + ompi_buffer_free(mca_gpr_replica_compound_cmd); + } + ompi_buffer_init(&mca_gpr_replica_compound_cmd, 0); + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_wait_for_compound_mutex); + return OMPI_SUCCESS; +} + + +int mca_gpr_replica_stop_compound_cmd(void) +{ + size_t size; + + OMPI_THREAD_LOCK(&mca_gpr_replica_wait_for_compound_mutex); + + mca_gpr_replica_compound_cmd_mode = false; + ompi_buffer_size(mca_gpr_replica_compound_cmd, &size); + if (0 < size) { + ompi_buffer_free(mca_gpr_replica_compound_cmd); + } + + if (mca_gpr_replica_compound_cmd_waiting) { + ompi_condition_signal(&mca_gpr_replica_compound_cmd_condition); + } + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_wait_for_compound_mutex); + return OMPI_SUCCESS; +} + + +ompi_list_t* mca_gpr_replica_exec_compound_cmd(bool return_requested) +{ + ompi_buffer_t results; + ompi_list_t *return_list=NULL; + size_t size; + bool compound_cmd_detected=false; + + if (mca_gpr_replica_debug) { + ompi_output(0, "[%d,%d,%d] Executing compound command", + OMPI_NAME_ARGS(*ompi_rte_get_self())); + } + + OMPI_THREAD_LOCK(&mca_gpr_replica_wait_for_compound_mutex); + + results = mca_gpr_replica_process_command_buffer(mca_gpr_replica_compound_cmd, + NULL, + &return_requested, + &compound_cmd_detected); + + if (return_requested) { + /* construct list of compound_value structs */ + } else { + return_list = NULL; + } + + ompi_buffer_free(results); + + mca_gpr_replica_compound_cmd_mode = false; + ompi_buffer_size(mca_gpr_replica_compound_cmd, &size); + if (0 < size) { + ompi_buffer_free(mca_gpr_replica_compound_cmd); + } + if (mca_gpr_replica_compound_cmd_waiting) { + ompi_condition_signal(&mca_gpr_replica_compound_cmd_condition); + } + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_wait_for_compound_mutex); + + mca_gpr_replica_process_callbacks(); + + return return_list; +} diff --git a/src/mca/gpr/replica/gpr_replica_del_index.c b/src/mca/gpr/replica/gpr_replica_del_index.c new file mode 100644 index 0000000000..5e0652d80e --- /dev/null +++ b/src/mca/gpr/replica/gpr_replica_del_index.c @@ -0,0 +1,218 @@ +/* + * $HEADER$ + */ +/** @file: + * + * The Open MPI general purpose registry - implementation. + * + */ + +/* + * includes + */ + +#include "ompi_config.h" + +#include "gpr_replica.h" +#include "gpr_replica_internals.h" + + +int mca_gpr_replica_delete_segment(char *segment) +{ + mca_gpr_replica_segment_t *seg; + + /* protect against errors */ + if (NULL == segment) { + return OMPI_ERROR; + } + + if (mca_gpr_replica_compound_cmd_mode) { + return mca_gpr_base_pack_delete_segment(mca_gpr_replica_compound_cmd, + mca_gpr_replica_silent_mode, segment); + } + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + + /* locate the segment */ + seg = mca_gpr_replica_find_seg(false, segment, MCA_NS_BASE_JOBID_MAX); + if (NULL == seg) { + return OMPI_ERROR; + } + + mca_gpr_replica_delete_segment_nl(seg); + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + return OMPI_SUCCESS; +} + + +void mca_gpr_replica_delete_segment_nl(mca_gpr_replica_segment_t *seg) +{ + + if (mca_gpr_replica_debug) { + ompi_output(0, "[%d,%d,%d] gpr replica: delete_segment entered", + OMPI_NAME_ARGS(*ompi_rte_get_self())); + } + + /* empty the segment storage */ + mca_gpr_replica_empty_segment(seg); + + /* remove segment name from global registry dictionary */ + mca_gpr_replica_delete_key(seg, NULL); + + return; +} + +int mca_gpr_replica_delete_object(ompi_registry_mode_t addr_mode, + char *segment, char **tokens) +{ + int rc; + mca_gpr_replica_segment_t *seg; + mca_gpr_replica_key_t *keys; + int num_keys; + + /* protect against errors */ + if (NULL == segment) { + return OMPI_ERROR; + } + + if (mca_gpr_replica_compound_cmd_mode) { + return mca_gpr_base_pack_delete_object(mca_gpr_replica_compound_cmd, + mca_gpr_replica_silent_mode, + addr_mode, segment, tokens); + } + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + + /* locate the segment */ + seg = mca_gpr_replica_find_seg(false, segment, ompi_name_server.get_jobid(ompi_rte_get_self())); + if (NULL == seg) { + return OMPI_ERROR; + } + + keys = mca_gpr_replica_get_key_list(seg, tokens, &num_keys); + + rc = mca_gpr_replica_delete_object_nl(addr_mode, seg, keys, num_keys); + + mca_gpr_replica_check_subscriptions(seg, MCA_GPR_REPLICA_OBJECT_DELETED); + + mca_gpr_replica_check_synchros(seg); + + if (NULL != keys) { + free(keys); + } + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + + mca_gpr_replica_process_callbacks(); + + return rc; +} + + +int mca_gpr_replica_delete_object_nl(ompi_registry_mode_t addr_mode, + mca_gpr_replica_segment_t *seg, + mca_gpr_replica_key_t *keys, + int num_keys) +{ + mca_gpr_replica_core_t *reg, *next; + int count; + mca_gpr_replica_trigger_list_t *trig; + + if (mca_gpr_replica_debug) { + ompi_output(0, "[%d,%d,%d] replica_delete_object entered: segment %s", + OMPI_NAME_ARGS(*ompi_rte_get_self()), seg->name); + } + + /* traverse the segment's registry, looking for matching tokens per the specified mode */ + count = 0; + for (reg = (mca_gpr_replica_core_t*)ompi_list_get_first(&seg->registry_entries); + reg != (mca_gpr_replica_core_t*)ompi_list_get_end(&seg->registry_entries); + ) { + + next = (mca_gpr_replica_core_t*)ompi_list_get_next(reg); + + /* for each registry entry, check the key list */ + if (mca_gpr_replica_check_key_list(addr_mode, num_keys, keys, + reg->num_keys, reg->keys)) { /* found the key(s) on the list */ + count++; + ompi_list_remove_item(&seg->registry_entries, ®->item); + } + reg = next; + } + + + /* update trigger counters */ + for (trig = (mca_gpr_replica_trigger_list_t*)ompi_list_get_first(&seg->triggers); + trig != (mca_gpr_replica_trigger_list_t*)ompi_list_get_end(&seg->triggers); + trig = (mca_gpr_replica_trigger_list_t*)ompi_list_get_next(trig)) { + if (mca_gpr_replica_check_key_list(trig->addr_mode, trig->num_keys, trig->keys, + num_keys, keys)) { + trig->count = trig->count - count; + } + } + + return OMPI_SUCCESS; +} + +ompi_list_t* mca_gpr_replica_index(char *segment) +{ + ompi_list_t* list; + mca_gpr_replica_segment_t *seg; + + if (mca_gpr_replica_compound_cmd_mode) { + mca_gpr_base_pack_index(mca_gpr_replica_compound_cmd, segment); + return NULL; + } + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + + if (NULL == segment) { /* want global level index */ + seg = NULL; + } else { + /* locate the segment */ + seg = mca_gpr_replica_find_seg(false, segment, MCA_NS_BASE_JOBID_MAX); + if (NULL == seg) { + return NULL; + } + } + + list = mca_gpr_replica_index_nl(seg); + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + return list; +} + +ompi_list_t* mca_gpr_replica_index_nl(mca_gpr_replica_segment_t *seg) +{ + ompi_list_t *answer; + mca_gpr_replica_keytable_t *ptr; + ompi_registry_index_value_t *ans; + + if (mca_gpr_replica_debug) { + ompi_output(0, "[%d,%d,%d] gpr replica: index entered segment: %s", + OMPI_NAME_ARGS(*ompi_rte_get_self()), seg->name); + } + + answer = OBJ_NEW(ompi_list_t); + + if (NULL == seg) { /* looking for index of global registry */ + for (ptr = (mca_gpr_replica_keytable_t*)ompi_list_get_first(&mca_gpr_replica_head.segment_dict); + ptr != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&mca_gpr_replica_head.segment_dict); + ptr = (mca_gpr_replica_keytable_t*)ompi_list_get_next(ptr)) { + ans = OBJ_NEW(ompi_registry_index_value_t); + ans->token = strdup(ptr->token); + ompi_list_append(answer, &ans->item); + } + } else { /* want index of specific segment */ + for (ptr = (mca_gpr_replica_keytable_t*)ompi_list_get_first(&seg->keytable); + ptr != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&seg->keytable); + ptr = (mca_gpr_replica_keytable_t*)ompi_list_get_next(ptr)) { + ans = OBJ_NEW(ompi_registry_index_value_t); + ans->token = strdup(ptr->token); + ompi_list_append(answer, &ans->item); + } + + } + return answer; +} diff --git a/src/mca/gpr/replica/gpr_replica_dump.c b/src/mca/gpr/replica/gpr_replica_dump.c new file mode 100644 index 0000000000..5bb9dfb5f4 --- /dev/null +++ b/src/mca/gpr/replica/gpr_replica_dump.c @@ -0,0 +1,240 @@ +/* + * $HEADER$ + */ +/** @file: + * + * The Open MPI general purpose registry - implementation. + * + */ + +/* + * includes + */ + +#include "ompi_config.h" + +#include "gpr_replica.h" +#include "gpr_replica_internals.h" + +static ompi_process_name_t* mca_gpr_replica_find_recipient(ompi_registry_notify_id_t idtag); + +static void mca_gpr_replica_dump_load_string(ompi_buffer_t buffer, char *tmp); + +void mca_gpr_replica_dump(int output_id) +{ + ompi_buffer_t buffer; + + if (mca_gpr_replica_debug) { + ompi_output(0, "[%d,%d,%d] gpr_replica_dump: entered for output on %d", + OMPI_NAME_ARGS(*ompi_rte_get_self()), output_id); + } + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + + if (mca_gpr_replica_compound_cmd_mode) { + mca_gpr_base_pack_dump(mca_gpr_replica_compound_cmd); + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + return; + } + + if (OMPI_SUCCESS != ompi_buffer_init(&buffer, 0)) { + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + return; + } + + mca_gpr_replica_dump_nl(buffer); + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + + mca_gpr_base_print_dump(buffer, output_id); + ompi_buffer_free(buffer); + +} + + +void mca_gpr_replica_dump_nl(ompi_buffer_t buffer) +{ + mca_gpr_replica_segment_t *seg; + mca_gpr_replica_core_t *reg; + mca_gpr_replica_key_t *key; + mca_gpr_replica_trigger_list_t *trig; + ompi_process_name_t *recip; + char *token, **tokptr; + int num_objects, num_trigs, cnt; + uint i; + char *tmp_out; + + /* loop through all segments */ + for (seg = (mca_gpr_replica_segment_t*)ompi_list_get_first(&mca_gpr_replica_head.registry); + seg != (mca_gpr_replica_segment_t*)ompi_list_get_end(&mca_gpr_replica_head.registry); + seg = (mca_gpr_replica_segment_t*)ompi_list_get_next(seg)) { + + asprintf(&tmp_out, "GPR Dump for Segment: %s\tOwner: %d", seg->name, (int)seg->owning_job); + mca_gpr_replica_dump_load_string(buffer, tmp_out); + + num_objects = ompi_list_get_size(&seg->registry_entries); + num_trigs = ompi_list_get_size(&seg->triggers); + + asprintf(&tmp_out, "\tNumber of objects: %d\tNumber of triggers: %d\n", num_objects, num_trigs); + mca_gpr_replica_dump_load_string(buffer, tmp_out); + + /* loop through all objects and print their tokens */ + cnt = 0; + for (reg = (mca_gpr_replica_core_t*)ompi_list_get_first(&seg->registry_entries); + reg != (mca_gpr_replica_core_t*)ompi_list_get_end(&seg->registry_entries); + reg = (mca_gpr_replica_core_t*)ompi_list_get_next(reg)) { + + asprintf(&tmp_out, "\tInfo for object %d\tObject size: %d", cnt, reg->object_size); + mca_gpr_replica_dump_load_string(buffer, tmp_out); + + /* parse the keys into tokens and print them */ + for (i=0, key=reg->keys; i < (uint)reg->num_keys; i++, key++) { + token = mca_gpr_replica_get_token(seg, *key); + if (NULL == token) { /* key couldn't be found */ + asprintf(&tmp_out, "\t\tKey num: %d - No entry found for key %X", + i, *key); + } else { + asprintf(&tmp_out, "\t\tKey num: %d - Key %d Token: %s", + i, *key, token); + free(token); + } + mca_gpr_replica_dump_load_string(buffer, tmp_out); + } + } + + /* loop through all triggers and print recipient name, type, and associated action */ + cnt = 0; + for (trig = (mca_gpr_replica_trigger_list_t*)ompi_list_get_first(&seg->triggers); + trig != (mca_gpr_replica_trigger_list_t*)ompi_list_get_end(&seg->triggers); + trig = (mca_gpr_replica_trigger_list_t*)ompi_list_get_next(trig)) { + + + if (OMPI_REGISTRY_SYNCHRO_MODE_NONE == trig->synch_mode) { /* subscription */ + asprintf(&tmp_out, "\tData for trigger %d\tType: SUBSCRIPTION", cnt); + mca_gpr_replica_dump_load_string(buffer, tmp_out); + asprintf(&tmp_out, "\t\tAssociated with notify number: %d",trig->local_idtag); + mca_gpr_replica_dump_load_string(buffer, tmp_out); + /* find recipient info from notify list */ + recip = mca_gpr_replica_find_recipient(trig->local_idtag); + if (NULL == recip) { + asprintf(&tmp_out, "\tIntended recipient: LOCAL"); + } else { + asprintf(&tmp_out, "\tIntended recipient: [%d,%d,%d]", OMPI_NAME_ARGS(*recip)); + } + mca_gpr_replica_dump_load_string(buffer, tmp_out); + ompi_pack_string(buffer, "\tActions:"); + if (OMPI_REGISTRY_NOTIFY_MODIFICATION & trig->action) { + ompi_pack_string(buffer, "\t\tOMPI_REGISTRY_NOTIFY_MODIFICATION"); + } + if (OMPI_REGISTRY_NOTIFY_ADD_SUBSCRIBER & trig->action) { + ompi_pack_string(buffer, "\t\tOMPI_REGISTRY_NOTIFY_ADD_SUBSCRIBER"); + } + if (OMPI_REGISTRY_NOTIFY_DELETE_ENTRY & trig->action) { + ompi_pack_string(buffer, "\t\tOMPI_REGISTRY_NOTIFY_DELETE_ENTRY"); + } + if (OMPI_REGISTRY_NOTIFY_ADD_ENTRY & trig->action) { + ompi_pack_string(buffer, "\t\tOMPI_REGISTRY_NOTIFY_ADD_ENTRY"); + } + if (OMPI_REGISTRY_NOTIFY_ON_STARTUP & trig->action) { + ompi_pack_string(buffer, "\t\tOMPI_REGISTRY_NOTIFY_ON_STARTUP"); + } + if (OMPI_REGISTRY_NOTIFY_ON_SHUTDOWN & trig->action) { + ompi_pack_string(buffer, "\t\tOMPI_REGISTRY_NOTIFY_ON_SHUTDOWN"); + } + if (OMPI_REGISTRY_NOTIFY_PRE_EXISTING & trig->action) { + ompi_pack_string(buffer, "\t\tOMPI_REGISTRY_NOTIFY_PRE_EXISTING"); + } + if (OMPI_REGISTRY_NOTIFY_INCLUDE_STARTUP_DATA & trig->action) { + ompi_pack_string(buffer, "\t\tOMPI_REGISTRY_NOTIFY_INCLUDE_STARTUP_DATA"); + } + if (OMPI_REGISTRY_NOTIFY_INCLUDE_SHUTDOWN_DATA & trig->action) { + ompi_pack_string(buffer, "\t\tOMPI_REGISTRY_NOTIFY_INCLUDE_SHUTDOWN_DATA"); + } + if (OMPI_REGISTRY_NOTIFY_ONE_SHOT & trig->action) { + ompi_pack_string(buffer, "\t\tOMPI_REGISTRY_NOTIFY_ONE_SHOT"); + } + + } else { /* synchro */ + asprintf(&tmp_out, "\tData for trigger %d\tType: SYNCHRO", cnt); + mca_gpr_replica_dump_load_string(buffer, tmp_out); + asprintf(&tmp_out, "\t\tAssociated with notify number: %d",trig->local_idtag); + mca_gpr_replica_dump_load_string(buffer, tmp_out); + + /* find recipient info from notify list */ + recip = mca_gpr_replica_find_recipient(trig->local_idtag); + if (NULL == recip) { + asprintf(&tmp_out, "\tIntended recipient: LOCAL"); + } else { + asprintf(&tmp_out, "\tIntended recipient: [%d,%d,%d]", OMPI_NAME_ARGS(*recip)); + } + mca_gpr_replica_dump_load_string(buffer, tmp_out); + ompi_pack_string(buffer, "\tSynchro Mode:"); + if (OMPI_REGISTRY_SYNCHRO_MODE_ASCENDING & trig->synch_mode) { + ompi_pack_string(buffer, "\t\tOMPI_REGISTRY_SYNCHRO_MODE_ASCENDING"); + } + if (OMPI_REGISTRY_SYNCHRO_MODE_DESCENDING & trig->synch_mode) { + ompi_pack_string(buffer, "\t\tOMPI_REGISTRY_SYNCHRO_MODE_DESCENDING"); + } + if (OMPI_REGISTRY_SYNCHRO_MODE_LEVEL & trig->synch_mode) { + ompi_pack_string(buffer, "\t\tOMPI_REGISTRY_SYNCHRO_MODE_LEVEL"); + } + if (OMPI_REGISTRY_SYNCHRO_MODE_GT_EQUAL & trig->synch_mode) { + ompi_pack_string(buffer, "\t\tOMPI_REGISTRY_SYNCHRO_MODE_GT_EQUAL"); + } + if (OMPI_REGISTRY_SYNCHRO_MODE_LT_EQUAL & trig->synch_mode) { + ompi_pack_string(buffer, "\t\tOMPI_REGISTRY_SYNCHRO_MODE_LT_EQUAL"); + } + if (OMPI_REGISTRY_SYNCHRO_MODE_CONTINUOUS & trig->synch_mode) { + ompi_pack_string(buffer, "\t\tOMPI_REGISTRY_SYNCHRO_MODE_CONTINUOUS"); + } + if (OMPI_REGISTRY_SYNCHRO_MODE_ONE_SHOT & trig->synch_mode) { + ompi_pack_string(buffer, "\t\tOMPI_REGISTRY_SYNCHRO_MODE_ONE_SHOT"); + } + if (OMPI_REGISTRY_SYNCHRO_MODE_STARTUP & trig->synch_mode) { + ompi_pack_string(buffer, "\t\tOMPI_REGISTRY_SYNCHRO_MODE_STARTUP"); + } + if (OMPI_REGISTRY_SYNCHRO_MODE_SHUTDOWN & trig->synch_mode) { + ompi_pack_string(buffer, "\t\tOMPI_REGISTRY_SYNCHRO_MODE_SHUTDOWN"); + } + asprintf(&tmp_out, "\tTrigger level: %d\tCurrent count: %d", trig->trigger, trig->count); + mca_gpr_replica_dump_load_string(buffer, tmp_out); + asprintf(&tmp_out, "\tTransition status: %d", trig->above_below); + mca_gpr_replica_dump_load_string(buffer, tmp_out); + } + asprintf(&tmp_out, "\tAddressing mode: %X\tNumber of tokens: %d", trig->addr_mode, trig->num_keys); + mca_gpr_replica_dump_load_string(buffer, tmp_out); + + for (i=0, tokptr=trig->tokens; i < trig->num_keys; i++, tokptr++) { + asprintf(&tmp_out, "\t\tToken: %s", *tokptr); + mca_gpr_replica_dump_load_string(buffer, tmp_out); + } + ompi_pack_string(buffer, "\n"); + cnt++; + } + ompi_pack_string(buffer, "\n\n"); + } + + return; +} + + +static ompi_process_name_t *mca_gpr_replica_find_recipient(ompi_registry_notify_id_t idtag) +{ + mca_gpr_replica_notify_request_tracker_t *trackptr; + + for (trackptr = (mca_gpr_replica_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_replica_notify_request_tracker); + trackptr != (mca_gpr_replica_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_replica_notify_request_tracker); + trackptr = (mca_gpr_replica_notify_request_tracker_t*)ompi_list_get_next(trackptr)) { + if (trackptr->local_idtag == idtag) { + return trackptr->requestor; + } + } + return NULL; +} + + +static void mca_gpr_replica_dump_load_string(ompi_buffer_t buffer, char *tmp) +{ + ompi_pack_string(buffer, tmp); + free(tmp); +} diff --git a/src/mca/gpr/replica/gpr_replica_internals.c b/src/mca/gpr/replica/gpr_replica_internals.c deleted file mode 100644 index a0ec73d2a9..0000000000 --- a/src/mca/gpr/replica/gpr_replica_internals.c +++ /dev/null @@ -1,1063 +0,0 @@ -/* - * $HEADER$ - */ -/** @file: - * - * The Open MPI general purpose registry - support functions. - * - */ - -/* - * includes - */ - -#include "ompi_config.h" - -#include -#include -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#ifdef HAVE_LIBGEN_H -#include -#endif - -#include "include/constants.h" - -#include "util/output.h" -#include "util/printf.h" -#include "util/proc_info.h" -#include "mca/mca.h" -#include "mca/oob/base/base.h" -#include "mca/gpr/base/base.h" -#include "gpr_replica.h" -#include "gpr_replica_internals.h" -#include "runtime/runtime.h" - - -/* - * - */ - -mca_gpr_replica_segment_t *gpr_replica_define_segment(char *segment) -{ - mca_gpr_replica_segment_t *seg; - mca_gpr_replica_key_t key; - - - key = gpr_replica_define_key(segment, NULL); - if (MCA_GPR_REPLICA_KEY_MAX == key) { /* got some kind of error code */ - return NULL; - } - - /* need to add the segment to the registry */ - seg = OBJ_NEW(mca_gpr_replica_segment_t); - seg->segment = key; - ompi_list_append(&mca_gpr_replica_head.registry, &seg->item); - - - return seg; -} - - -mca_gpr_replica_segment_t *gpr_replica_find_seg(bool create, char *segment) -{ - mca_gpr_replica_keytable_t *ptr_seg; - mca_gpr_replica_segment_t *seg; - - - /* search the registry segments to find which one is being referenced */ - for (ptr_seg = (mca_gpr_replica_keytable_t*)ompi_list_get_first(&mca_gpr_replica_head.segment_dict); - ptr_seg != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&mca_gpr_replica_head.segment_dict); - ptr_seg = (mca_gpr_replica_keytable_t*)ompi_list_get_next(ptr_seg)) { - if (0 == strcmp(segment, ptr_seg->token)) { - /* search mca_gpr_replica_head to find segment */ - for (seg=(mca_gpr_replica_segment_t*)ompi_list_get_first(&mca_gpr_replica_head.registry); - seg != (mca_gpr_replica_segment_t*)ompi_list_get_end(&mca_gpr_replica_head.registry); - seg = (mca_gpr_replica_segment_t*)ompi_list_get_next(seg)) { - if(seg->segment == ptr_seg->key) { - return(seg); - } - } - } - } - - - if (create) { - /* didn't find the dictionary entry - create it */ - return gpr_replica_define_segment(segment); - } - return NULL; /* don't create it - just return NULL */ -} - -mca_gpr_replica_keytable_t *gpr_replica_find_dict_entry(char *segment, char *token) -{ - mca_gpr_replica_keytable_t *ptr_seg; - mca_gpr_replica_keytable_t *ptr_key; - mca_gpr_replica_segment_t *seg; - - - /* search the registry segments to find which one is being referenced */ - for (ptr_seg = (mca_gpr_replica_keytable_t*)ompi_list_get_first(&mca_gpr_replica_head.segment_dict); - ptr_seg != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&mca_gpr_replica_head.segment_dict); - ptr_seg = (mca_gpr_replica_keytable_t*)ompi_list_get_next(ptr_seg)) { - if (0 == strcmp(segment, ptr_seg->token)) { - if (NULL == token) { /* just want segment token-key pair */ - return(ptr_seg); - } - /* search registry to find segment */ - for (seg=(mca_gpr_replica_segment_t*)ompi_list_get_first(&mca_gpr_replica_head.registry); - seg != (mca_gpr_replica_segment_t*)ompi_list_get_end(&mca_gpr_replica_head.registry); - seg = (mca_gpr_replica_segment_t*)ompi_list_get_next(seg)) { - if(seg->segment == ptr_seg->key) { - /* got segment - now find specified token-key pair in that dictionary */ - for (ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_get_first(&seg->keytable); - ptr_key != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&seg->keytable); - ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_get_next(ptr_key)) { - if (0 == strcmp(token, ptr_key->token)) { - return(ptr_key); - } - } - return(NULL); /* couldn't find the specified entry */ - } - } - return(NULL); /* couldn't find segment, even though we found entry in registry dict */ - } - } - return(NULL); /* couldn't find segment token-key pair */ -} - - -mca_gpr_replica_key_t gpr_replica_get_key(char *segment, char *token) -{ - mca_gpr_replica_keytable_t *ptr_key; - - /* find registry segment */ - ptr_key = gpr_replica_find_dict_entry(segment, NULL); - if (NULL != ptr_key) { - if (NULL == token) { /* only want segment key */ - return(ptr_key->key); - } - /* if token specified, find the dictionary entry that matches token */ - ptr_key = gpr_replica_find_dict_entry(segment, token); - if (NULL != ptr_key) { - return(ptr_key->key); - } - return MCA_GPR_REPLICA_KEY_MAX; /* couldn't find dictionary entry */ - } - return MCA_GPR_REPLICA_KEY_MAX; /* couldn't find segment */ -} - - -char *gpr_replica_get_token(char *segment, mca_gpr_replica_key_t key) -{ - mca_gpr_replica_segment_t *seg; - mca_gpr_replica_keytable_t *ptr_key; - char *answer; - - if (NULL == segment) { - return NULL; - } - - seg = gpr_replica_find_seg(false, segment); - if (NULL == seg) { - return NULL; - } - - - /* find the matching key */ - for (ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_get_first(&seg->keytable); - ptr_key != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&seg->keytable); - ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_get_next(ptr_key)) { - if (key == ptr_key->key) { - answer = strdup(ptr_key->token); - return answer; - } - } - return(NULL); /* couldn't find the specified entry */ -} - -ompi_list_t *gpr_replica_get_key_list(char *segment, char **tokens) -{ - ompi_list_t *keys; - char **token; - mca_gpr_replica_keytable_t *keyptr; - - token = tokens; - keys = OBJ_NEW(ompi_list_t); - - /* protect against errors */ - if (NULL == segment || NULL == tokens) { - return keys; - } - - while (NULL != *token) { /* traverse array of tokens until NULL */ - keyptr = OBJ_NEW(mca_gpr_replica_keytable_t); - keyptr->token = strdup(*token); - keyptr->key = gpr_replica_get_key(segment, *token); - ompi_list_append(keys, &keyptr->item); - token++; - } - return keys; -} - -mca_gpr_replica_key_t gpr_replica_define_key(char *segment, char *token) -{ - mca_gpr_replica_segment_t *seg; - mca_gpr_replica_keytable_t *ptr_seg, *ptr_key, *novel; - - /* protect against errors */ - if (NULL == segment) { - return OMPI_ERROR; - } - - /* if token is NULL, then this is defining a segment name. Check dictionary to ensure uniqueness */ - if (NULL == token) { - - - for (ptr_seg = (mca_gpr_replica_keytable_t*)ompi_list_get_first(&mca_gpr_replica_head.segment_dict); - ptr_seg != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&mca_gpr_replica_head.segment_dict); - ptr_seg = (mca_gpr_replica_keytable_t*)ompi_list_get_next(ptr_seg)) { - if (0 == strcmp(segment, ptr_seg->token)) { - return ptr_seg->key; - } - } - - /* okay, name is not previously taken. Define a key value for it and return */ - novel = OBJ_NEW(mca_gpr_replica_keytable_t); - novel->token = strdup(segment); - if (0 == ompi_list_get_size(&mca_gpr_replica_head.freekeys)) { /* no keys waiting for reuse */ - if (MCA_GPR_REPLICA_KEY_MAX-2 > mca_gpr_replica_head.lastkey) { /* have a key left */ - mca_gpr_replica_head.lastkey++; - novel->key = mca_gpr_replica_head.lastkey; - } else { /* out of keys */ - return MCA_GPR_REPLICA_KEY_MAX; - } - } else { - ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_remove_first(&mca_gpr_replica_head.freekeys); - novel->key = ptr_key->key; - } - ompi_list_append(&mca_gpr_replica_head.segment_dict, &novel->item); - return novel->key; - } - - /* okay, token is specified */ - /* search the registry segments to find which one is being referenced */ - seg = gpr_replica_find_seg(true, segment); - if (NULL != seg) { - /* using that segment, check dictionary to ensure uniqueness */ - for (ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_get_first(&seg->keytable); - ptr_key != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&seg->keytable); - ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_get_next(ptr_key)) { - if (0 == strcmp(token, ptr_key->token)) { - return ptr_key->key; /* already taken, report value */ - } - } - /* okay, token is unique - create dictionary entry */ - novel = OBJ_NEW(mca_gpr_replica_keytable_t); - novel->token = strdup(token); - if (0 == ompi_list_get_size(&seg->freekeys)) { /* no keys waiting for reuse */ - seg->lastkey++; - novel->key = seg->lastkey; - } else { - ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_remove_first(&seg->freekeys); - novel->key = ptr_key->key; - } - ompi_list_append(&seg->keytable, &novel->item); - return novel->key; - } - /* couldn't find segment */ - return MCA_GPR_REPLICA_KEY_MAX; -} - -int gpr_replica_delete_key(char *segment, char *token) -{ - mca_gpr_replica_segment_t *seg; - mca_gpr_replica_core_t *reg; - mca_gpr_replica_keytable_t *ptr_seg, *ptr_key, *novel; - mca_gpr_replica_key_t *key; - int i; - - /* protect ourselves against errors */ - if (NULL == segment) { - return(OMPI_ERROR); - } - - - /* find the segment */ - seg = gpr_replica_find_seg(false, segment); - if (NULL != seg) { - - /* if specified token is NULL, then this is deleting a segment name.*/ - if (NULL == token) { - if (OMPI_SUCCESS != gpr_replica_empty_segment(seg)) { /* couldn't empty segment */ - return OMPI_ERROR; - } - /* now remove the dictionary entry from the global registry dictionary*/ - ptr_seg = gpr_replica_find_dict_entry(segment, NULL); - if (NULL == ptr_seg) { /* failed to find dictionary entry */ - return OMPI_ERROR; - } - - /* add key to global registry's freekey list */ - novel = OBJ_NEW(mca_gpr_replica_keytable_t); - novel->token = NULL; - novel->key = ptr_seg->key; - ompi_list_append(&mca_gpr_replica_head.freekeys, &novel->item); - - /* remove the dictionary entry */ - ompi_list_remove_item(&mca_gpr_replica_head.segment_dict, &ptr_seg->item); - - - return(OMPI_SUCCESS); - - } else { /* token not null, so need to find dictionary element to delete */ - ptr_key = gpr_replica_find_dict_entry(segment, token); - if (NULL != ptr_key) { - /* found key in dictionary */ - /* need to search this segment's registry to find all instances of key & "delete" them */ - for (reg = (mca_gpr_replica_core_t*)ompi_list_get_first(&seg->registry_entries); - reg != (mca_gpr_replica_core_t*)ompi_list_get_end(&seg->registry_entries); - reg = (mca_gpr_replica_core_t*)ompi_list_get_next(reg)) { - - /* check the key list */ - for (i=0, key=reg->keys; i < reg->num_keys; i++, key++) { - if (ptr_key->key == *key) { /* found match */ - *key = MCA_GPR_REPLICA_KEY_MAX; - } - } - - /* add key to this segment's freekey list */ - novel = OBJ_NEW(mca_gpr_replica_keytable_t); - novel->token = NULL; - novel->key = ptr_key->key; - ompi_list_append(&seg->freekeys, &novel->item); - - /* now remove the dictionary entry from the segment's dictionary */ - ompi_list_remove_item(&seg->keytable, &ptr_key->item); - return(OMPI_SUCCESS); - } - } - return(OMPI_ERROR); /* if we get here, then we couldn't find token in dictionary */ - } - } - return(OMPI_ERROR); /* if we get here, then we couldn't find segment */ -} - -int gpr_replica_empty_segment(mca_gpr_replica_segment_t *seg) -{ - mca_gpr_replica_core_t *ptr; - mca_gpr_replica_keytable_t *keytab; - mca_gpr_replica_keylist_t *keylst; - - /* need to free memory from each entry - remove_last returns pointer to the entry */ - - - /* empty the segment's registry */ - while (!ompi_list_is_empty(&seg->registry_entries)) { - ptr = (mca_gpr_replica_core_t*)ompi_list_remove_first(&seg->registry_entries); - OBJ_RELEASE(ptr); - } - - /* empty the segment's dictionary */ - while (!ompi_list_is_empty(&seg->keytable)) { - keytab = (mca_gpr_replica_keytable_t*)ompi_list_remove_first(&seg->keytable); - OBJ_RELEASE(keytab); - } - - /* empty the list of free keys */ - while (!ompi_list_is_empty(&seg->freekeys)) { - keylst = (mca_gpr_replica_keylist_t*)ompi_list_remove_first(&seg->freekeys); - OBJ_RELEASE(keylst); - } - /* now remove segment from global registry */ - ompi_list_remove_item(&mca_gpr_replica_head.registry, &seg->item); - OBJ_RELEASE(seg); - - - return OMPI_SUCCESS; -} - -/* - * A mode of "NONE" or "OVERWRITE" defaults to "XAND" behavior - */ -bool gpr_replica_check_key_list(ompi_registry_mode_t addr_mode, - mca_gpr_replica_key_t num_keys_search, mca_gpr_replica_key_t *keys, - mca_gpr_replica_key_t num_keys_entry, mca_gpr_replica_key_t *entry_keys) -{ - mca_gpr_replica_key_t *key1, *key2; - int num_found; - bool exclusive, no_match; - int i, j; - - /* check for trivial case */ - if (NULL == keys) { /* wildcard case - automatically true */ - return true; - } - - if (OMPI_REGISTRY_NONE == addr_mode || - OMPI_REGISTRY_OVERWRITE == addr_mode) { /* set default behavior for search */ - addr_mode = OMPI_REGISTRY_XAND; - } - - /* take care of trivial cases that don't require search */ - if ((OMPI_REGISTRY_XAND & addr_mode) && - (num_keys_search != num_keys_entry)) { /* can't possibly turn out "true" */ - return false; - } - - if ((OMPI_REGISTRY_AND & addr_mode) && - (num_keys_search > num_keys_entry)) { /* can't find enough matches */ - return false; - } - - /* okay, have to search for remaining possibilities */ - num_found = 0; - exclusive = true; - for (i=0, key1=entry_keys; i < num_keys_entry; i++, key1++) { - no_match = true; - for (j=0, key2=keys; j < num_keys_search; j++, key2++) { - if (*key1 == *key2) { /* found a match */ - num_found++; - no_match = false; - if (OMPI_REGISTRY_OR & addr_mode) { /* only need one match */ - return true; - } - } - } - if (no_match) { - exclusive = false; - } - } - - if (OMPI_REGISTRY_XAND & addr_mode) { /* deal with XAND case */ - if (num_found == num_keys_entry) { /* found all, and nothing more */ - return true; - } else { /* found either too many or not enough */ - return false; - } - } - - if (OMPI_REGISTRY_XOR & addr_mode) { /* deal with XOR case */ - if (num_found > 0 && exclusive) { /* found at least one and nothing not on list */ - return true; - } else { - return false; - } - } - - if (OMPI_REGISTRY_AND & addr_mode) { /* deal with AND case */ - if (num_found == num_keys_search) { /* found all the required keys */ - return true; - } else { - return false; - } - } - - /* should be impossible situation, but just to be safe... */ - return false; -} - -mca_gpr_replica_trigger_list_t* -gpr_replica_construct_trigger(ompi_registry_synchro_mode_t synchro_mode, - ompi_registry_notify_action_t action, - ompi_registry_mode_t addr_mode, - char *segment, char **tokens, int trigger, - mca_gpr_notify_id_t id_tag) -{ - mca_gpr_replica_segment_t *seg; - mca_gpr_replica_core_t *reg; - mca_gpr_replica_trigger_list_t *trig; - char **tokptr, **tok2; - mca_gpr_replica_key_t *keyptr; - int i, num_tokens; - - - seg = gpr_replica_find_seg(true, segment); - if (NULL == seg) { /* couldn't find or create segment */ - return NULL; - } - - trig = OBJ_NEW(mca_gpr_replica_trigger_list_t); - - trig->synch_mode = synchro_mode; - trig->action = action; - trig->addr_mode = addr_mode; - trig->trigger = trigger; - trig->count = 0; - trig->id_tag = id_tag; - - trig->num_keys = 0; - trig->keys = NULL; - - if (NULL != tokens) { /* tokens provided */ - - /* count number of tokens */ - tokptr = tokens; - num_tokens = 0; - while (NULL != tokptr && NULL != *tokptr) { - num_tokens++; - tokptr++; - } - /* get memory for the keys and the tokens */ - trig->keys = (mca_gpr_replica_key_t*)malloc(num_tokens*sizeof(mca_gpr_replica_key_t)); - keyptr = trig->keys; - trig->tokens = (char**)malloc((num_tokens+1)*(sizeof(char*))); - tok2 = trig->tokens; - /* store tokens and key values of tokens, defining them if needed */ - for (i=0, tokptr=tokens; NULL != tokptr && NULL != *tokptr; i++, tokptr++) { - *keyptr = gpr_replica_get_key(segment, *tokptr); - if (MCA_GPR_REPLICA_KEY_MAX == *keyptr) { - *keyptr = gpr_replica_define_key(segment, *tokptr); - } - keyptr++; - *tok2 = strdup(*tokptr); - tok2++; - } - trig->num_keys = num_tokens; - *tok2 = NULL; - } - - /* traverse segment entries and initialize trigger count */ - for (reg = (mca_gpr_replica_core_t*)ompi_list_get_first(&seg->registry_entries); - reg != (mca_gpr_replica_core_t*)ompi_list_get_end(&seg->registry_entries); - reg = (mca_gpr_replica_core_t*)ompi_list_get_next(reg)) { - if (gpr_replica_check_key_list(addr_mode, trig->num_keys, trig->keys, - reg->num_keys, reg->keys)) { - trig->count++; - } - } - - /* check synchro levels */ - if (OMPI_REGISTRY_SYNCHRO_MODE_NONE != trig->synch_mode) { /* looking at synchro event */ - if (trig->count > trig->trigger) { - trig->above_below = MCA_GPR_REPLICA_TRIGGER_ABOVE_LEVEL; - } else if (trig->count < trig->trigger) { - trig->above_below = MCA_GPR_REPLICA_TRIGGER_BELOW_LEVEL; - } else { - trig->above_below = MCA_GPR_REPLICA_TRIGGER_AT_LEVEL; - } - } - - - ompi_list_append(&seg->triggers, &trig->item); - - - return trig; - -} - -mca_gpr_notify_id_t gpr_replica_remove_trigger(ompi_registry_synchro_mode_t synchro_mode, - ompi_registry_notify_action_t action, - ompi_registry_mode_t addr_mode, - char *segment, char **tokens, int trigger) -{ - mca_gpr_replica_segment_t *seg=NULL; - mca_gpr_replica_trigger_list_t *trig=NULL; - mca_gpr_notify_id_t id_tag=0; - char **tokptr=NULL; - mca_gpr_replica_key_t *keys=NULL, *keyptr=NULL, *kptr=NULL; - int i=0, num_tokens=0; - bool found=false, mismatch=false; - - - seg = gpr_replica_find_seg(false, segment); - if (NULL == seg) { /* couldn't find segment */ - return MCA_GPR_NOTIFY_ID_MAX; - } - - found = false; - num_tokens = 0; - - if (NULL != tokens) { /* tokens provided */ - - /* count number of tokens */ - tokptr = tokens; - num_tokens = 0; - while (NULL != tokptr && NULL != *tokptr) { - num_tokens++; - tokptr++; - } - keys = (mca_gpr_replica_key_t*)malloc(num_tokens*sizeof(mca_gpr_replica_key_t)); - keyptr = keys; - /* store key values of tokens - any undefined means error */ - for (i=0, tokptr=tokens; NULL != tokptr && NULL != *tokptr; i++, tokptr++) { - *keyptr = gpr_replica_get_key(segment, *tokptr); - if (MCA_GPR_REPLICA_KEY_MAX == *keyptr) { - goto CLEANUP; - } - keyptr++; - } - } - - - /* search segment's trigger list for specified trigger event */ - for (trig = (mca_gpr_replica_trigger_list_t*)ompi_list_get_first(&seg->triggers); - trig != (mca_gpr_replica_trigger_list_t*)ompi_list_get_end(&seg->triggers) && !found; - trig = (mca_gpr_replica_trigger_list_t*)ompi_list_get_next(trig)) { - if (trig->synch_mode == synchro_mode && - trig->action == action && - trig->addr_mode == addr_mode && - trig->trigger == trigger && - trig->num_keys == num_tokens) { /* all else matches - check keys */ - mismatch = false; - for (i=0, keyptr=keys, kptr=trig->keys; i < num_tokens && !mismatch; i++, keyptr++, kptr++) { - if (*keyptr != *kptr) { - mismatch = true; - } - } - if (!mismatch) { - found = true; - } - } - } - - CLEANUP: - if (NULL != keys) { - free(keys); - keys = NULL; - } - - if (found) { - id_tag = trig->id_tag; - ompi_list_remove_item(&seg->triggers, &trig->item); - OBJ_RELEASE(trig); - return id_tag; - } - - - return MCA_GPR_NOTIFY_ID_MAX; -} - - -ompi_registry_notify_message_t *gpr_replica_construct_notify_message(ompi_registry_mode_t addr_mode, - char *segment, char **tokens) -{ - ompi_list_t *reg_entries; - ompi_registry_value_t *reg, *obj; - ompi_registry_notify_message_t *msg; - char **tokptr, **tokptr2; - int num_tokens, i; - - if (mca_gpr_replica_debug) { - ompi_output(0, "trigger fired on segment %s", segment); - } - - /* protect against errors */ - if (NULL == segment) { - return NULL; - } - - reg_entries = gpr_replica_get_nl(addr_mode, segment, tokens); - - /* compute number of tokens */ - tokptr = tokens; - num_tokens = 0; - while (tokptr && NULL != *tokptr) { - num_tokens++; - tokptr++; - } - - msg = OBJ_NEW(ompi_registry_notify_message_t); - msg->num_tokens = num_tokens; - if(num_tokens) { - msg->tokens = (char**)malloc(num_tokens*(sizeof(char*))); - } else { - msg->tokens = NULL; - } - tokptr = tokens; - tokptr2 = msg->tokens; - for (i=0, tokptr=tokens, tokptr2=msg->tokens; - i < num_tokens; - i++, tokptr++, tokptr2++) { - *tokptr2 = strdup(*tokptr); - } - - while (NULL != (reg = (ompi_registry_value_t*)ompi_list_remove_first(reg_entries))) { - obj = OBJ_NEW(ompi_registry_value_t); - obj->object = (ompi_registry_object_t)malloc(reg->object_size); - memcpy(obj->object, reg->object, reg->object_size); - obj->object_size = reg->object_size; - ompi_list_append(&msg->data, &obj->item); - OBJ_RELEASE(reg); - } - - OBJ_RELEASE(reg_entries); - if (mca_gpr_replica_debug) { - ompi_output(0, "[%d,%d,%d] gpr replica-construct_notify: msg built", ompi_rte_get_self()->cellid, - ompi_rte_get_self()->jobid, ompi_rte_get_self()->vpid); - } - - return msg; -} - -bool gpr_replica_process_triggers(char *segment, - mca_gpr_replica_trigger_list_t *trig, - ompi_registry_notify_message_t *message) -{ - mca_gpr_replica_segment_t *seg; - mca_gpr_notify_request_tracker_t *trackptr; - bool found; - mca_gpr_replica_callbacks_t *cb; - - if (mca_gpr_replica_debug) { - ompi_output(0, "[%d,%d,%d] gpr replica: process_trig entered", ompi_rte_get_self()->cellid, - ompi_rte_get_self()->jobid, ompi_rte_get_self()->vpid); - } - - /* protect against errors */ - if (NULL == message || NULL == segment) { - return true; - } - - - seg = gpr_replica_find_seg(false, segment); - if (NULL == seg) { /* couldn't find segment */ - return true; - } - - if (mca_gpr_replica_debug) { - ompi_output(0, "[%d,%d,%d] gpr replica-process_trig: segment found", ompi_rte_get_self()->cellid, - ompi_rte_get_self()->jobid, ompi_rte_get_self()->vpid); - } - - - /* find corresponding notify request */ - found = false; - for (trackptr = (mca_gpr_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_replica_notify_request_tracker); - trackptr != (mca_gpr_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_replica_notify_request_tracker); - trackptr = (mca_gpr_notify_request_tracker_t*)ompi_list_get_next(trackptr)) { - if (trackptr->id_tag == trig->id_tag) { - found = true; - break; - } - } - - if (!found) { /* didn't find request */ - ompi_output(0, "Notification error - request not found"); - /* OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex); */ - return true; - } - - /* process request */ - cb = OBJ_NEW(mca_gpr_replica_callbacks_t); - if (NULL == trackptr->requestor) { /* local request - queue callback fn with their tag */ - cb->cb_func = trackptr->callback; - cb->message = message; - cb->user_tag = trackptr->user_tag; - - } else { /* remote request - queue remote callback */ - cb->requestor = ompi_name_server.copy_process_name(trackptr->requestor); - cb->remote_idtag = trackptr->req_tag; - cb->message = message; - } - ompi_list_append(&mca_gpr_replica_callbacks, &cb->item); - - /* if one-shot, remove request from tracking system */ - if (OMPI_REGISTRY_SYNCHRO_MODE_ONE_SHOT & trig->synch_mode) { - ompi_list_remove_item(&mca_gpr_replica_notify_request_tracker, &trackptr->item); - OBJ_RELEASE(trackptr); - - /* ....and from the corresponding registry segment */ - ompi_list_remove_item(&seg->triggers, &trig->item); - OBJ_RELEASE(trig); - } - if (mca_gpr_replica_debug) { - ompi_output(0, "[%d,%d,%d] gpr replica-process_trig: complete", ompi_rte_get_self()->cellid, - ompi_rte_get_self()->jobid, ompi_rte_get_self()->vpid); - } - - return false; - - -} - - -void gpr_replica_process_callbacks(void) -{ - mca_gpr_replica_callbacks_t *cb; - - while (NULL != (cb = (mca_gpr_replica_callbacks_t*)ompi_list_remove_first(&mca_gpr_replica_callbacks))) { - if (NULL == cb->requestor) { /* local callback */ - if (mca_gpr_replica_debug) { - ompi_output(0, "process_callbacks: local"); - } - cb->cb_func(cb->message, cb->user_tag); - } else { /* remote request - send message back */ - if (mca_gpr_replica_debug) { - ompi_output(0, "process_callbacks: remote to [%d,%d,%d]", cb->requestor->cellid, - cb->requestor->jobid, cb->requestor->vpid); - } - gpr_replica_remote_notify(cb->requestor, cb->remote_idtag, cb->message); - } - OBJ_RELEASE(cb); - } -} - - -mca_gpr_notify_id_t gpr_replica_enter_notify_request(ompi_process_name_t *requestor, - mca_gpr_notify_id_t idtag, - ompi_registry_notify_cb_fn_t cb_func, - void *user_tag) -{ - mca_gpr_notify_request_tracker_t *trackptr; - mca_gpr_idtag_list_t *ptr_free_id; - - trackptr = OBJ_NEW(mca_gpr_notify_request_tracker_t); - trackptr->requestor = ompi_name_server.copy_process_name(requestor); - trackptr->req_tag = idtag; - trackptr->callback = cb_func; - trackptr->user_tag = user_tag; - if (ompi_list_is_empty(&mca_gpr_replica_free_notify_id_tags)) { - trackptr->id_tag = mca_gpr_replica_last_notify_id_tag; - mca_gpr_replica_last_notify_id_tag++; - } else { - ptr_free_id = (mca_gpr_idtag_list_t*)ompi_list_remove_first(&mca_gpr_replica_free_notify_id_tags); - trackptr->id_tag = ptr_free_id->id_tag; - } - ompi_list_append(&mca_gpr_replica_notify_request_tracker, &trackptr->item); - - return trackptr->id_tag; -} - - -mca_gpr_notify_id_t gpr_replica_remove_notify_request(mca_gpr_notify_id_t idtag) -{ - mca_gpr_notify_request_tracker_t *trackptr; - mca_gpr_idtag_list_t *ptr_free_id; - mca_gpr_notify_id_t ret_value; - - /* find request on replica notify tracking system */ - for (trackptr = (mca_gpr_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_replica_notify_request_tracker); - trackptr != (mca_gpr_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_replica_notify_request_tracker) && - trackptr->id_tag != idtag; - trackptr = (mca_gpr_notify_request_tracker_t*)ompi_list_get_next(trackptr)); - - if (trackptr != (mca_gpr_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_replica_notify_request_tracker)) { - /* save the requestor's tag */ - ret_value = trackptr->req_tag; - - /* ...and remove the request */ - ompi_list_remove_item(&mca_gpr_replica_notify_request_tracker, &trackptr->item); - /* put local id tag on free list */ - ptr_free_id = OBJ_NEW(mca_gpr_idtag_list_t); - ptr_free_id->id_tag = trackptr->id_tag; - ompi_list_append(&mca_gpr_replica_free_notify_id_tags, &ptr_free_id->item); - /* release tracker item */ - OBJ_RELEASE(trackptr); - - return ret_value; - } - /* error condition if reach here */ - return MCA_GPR_NOTIFY_ID_MAX; -} - -ompi_list_t *gpr_replica_test_internals(int level) -{ - ompi_list_t *test_results, *keylist; - ompi_registry_internal_test_results_t *result; - char name[30], name2[30]; - char *name3[30]; - int i, j, k; - mca_gpr_replica_key_t segkey, key; - mca_gpr_replica_segment_t *seg; - mca_gpr_replica_keytable_t *dict_entry; - bool success; - - - test_results = OBJ_NEW(ompi_list_t); - - ompi_output(0, "testing define segment"); - /* create several test segments */ - success = true; - result = OBJ_NEW(ompi_registry_internal_test_results_t); - result->test = strdup("test-create-segment"); - for (i=0; i<5 && success; i++) { - sprintf(name, "test-def-seg%d", i); - if (NULL == gpr_replica_define_segment(name)) { - success = false; - } - } - if (success) { - result->message = strdup("success"); - } else { - result->message = strdup("failed"); - } - ompi_list_append(test_results, &result->item); - - ompi_output(0, "testing get key for segment "); - /* check ability to get key for a segment */ - success = true; - result = OBJ_NEW(ompi_registry_internal_test_results_t); - result->test = strdup("test-get-seg-key"); - for (i=0; i<5 && success; i++) { - sprintf(name, "test-def-seg%d", i); - key = gpr_replica_get_key(name, NULL); - if (MCA_GPR_REPLICA_KEY_MAX == key) { /* got an error */ - success = false; - } - } - if (success) { - result->message = strdup("success"); - } else { - result->message = strdup("failed"); - } - ompi_list_append(test_results, &result->item); - - ompi_output(0, "testing define key"); - /* check that define key protects uniqueness */ - success = true; - result = OBJ_NEW(ompi_registry_internal_test_results_t); - result->test = strdup("test-define-key-uniqueness"); - for (i=0; i<5 && success; i++) { - sprintf(name, "test-def-seg%d", i); - segkey = gpr_replica_get_key(name, NULL); - key = gpr_replica_define_key(name, NULL); - if (segkey != key) { /* got an error */ - success = false; - } - } - if (success) { - result->message = strdup("success"); - } else { - result->message = strdup("failed"); - } - ompi_list_append(test_results, &result->item); - - ompi_output(0, "testing find segment"); - /* check the ability to find a segment */ - i = 2; - sprintf(name, "test-def-seg%d", i); - result = OBJ_NEW(ompi_registry_internal_test_results_t); - result->test = strdup("test-find-seg"); - seg = gpr_replica_find_seg(false, name); - if (NULL == seg) { - asprintf(&result->message, "test failed with NULL returned: %s", name); - } else { /* locate key and check it */ - segkey = gpr_replica_get_key(name, NULL); - if (segkey == seg->segment) { - result->message = strdup("success"); - } else { - asprintf(&result->message, "test failed: key %d seg %d", segkey, seg->segment); - } - } - ompi_list_append(test_results, &result->item); - - ompi_output(0, "testing define key within segment"); - /* check ability to define key within a segment */ - success = true; - result = OBJ_NEW(ompi_registry_internal_test_results_t); - result->test = strdup("test-define-key-segment"); - for (i=0; i<5 && success; i++) { - sprintf(name, "test-def-seg%d", i); - for (j=0; j<10 && success; j++) { - sprintf(name2, "test-key%d", j); - k = gpr_replica_define_key(name, name2); - if (0 > k) { /* got an error */ - success = false; - } - } - } - if (success) { - result->message = strdup("success"); - } else { - result->message = strdup("failed"); - } - ompi_list_append(test_results, &result->item); - - - ompi_output(0, "testing get key within segment"); - /* check ability to retrieve key within a segment */ - success = true; - result = OBJ_NEW(ompi_registry_internal_test_results_t); - result->test = strdup("test-get-key-segment"); - for (i=0; i<5 && success; i++) { - sprintf(name, "test-def-seg%d", i); - for (j=0; j<10 && success; j++) { - sprintf(name2, "test-key%d", j); - key = gpr_replica_get_key(name, name2); - if (MCA_GPR_REPLICA_KEY_MAX == key) { /* got an error */ - success = false; - } - } - } - if (success) { - result->message = strdup("success"); - } else { - result->message = strdup("failed"); - } - ompi_list_append(test_results, &result->item); - - - ompi_output(0, "testing get dict entry - global"); - /* check ability to get dictionary entries */ - success = true; - result = OBJ_NEW(ompi_registry_internal_test_results_t); - result->test = strdup("test-get-dict-entry"); - /* first check ability to get segment values */ - for (i=0; i<5 && success; i++) { - sprintf(name, "test-def-seg%d", i); - dict_entry = gpr_replica_find_dict_entry(name, NULL); - if (NULL == dict_entry) { /* got an error */ - success = false; - } - } - if (success) { - result->message = strdup("success"); - } else { - result->message = strdup("failed"); - } - ompi_list_append(test_results, &result->item); - - ompi_output(0, "testing get dict entry - segment"); - if (success) { /* segment values checked out - move on to within a segment */ - result = OBJ_NEW(ompi_registry_internal_test_results_t); - result->test = strdup("test-get-dict-entry-segment"); - for (i=0; i<5; i++) { - sprintf(name, "test-def-seg%d", i); - for (j=0; j<10; j++) { - sprintf(name2, "test-key%d", j); - dict_entry = gpr_replica_find_dict_entry(name, NULL); - if (NULL == dict_entry) { /* got an error */ - success = false; - } - } - } - if (success) { - result->message = strdup("success"); - } else { - result->message = strdup("failed"); - } - ompi_list_append(test_results, &result->item); - } - - - ompi_output(0, "testing get key list"); - /* check ability to get key list */ - success = true; - result = OBJ_NEW(ompi_registry_internal_test_results_t); - result->test = strdup("test-get-keylist"); - for (i=0; i<5 && success; i++) { - sprintf(name, "test-def-seg%d", i); - for (j=0; j<10 && success; j++) { - asprintf(&name3[j], "test-key%d", j); - } - name3[j] = NULL; - keylist = gpr_replica_get_key_list(name, name3); - if (0 >= ompi_list_get_size(keylist)) { /* error condition */ - success = false; - } - } - if (success) { - result->message = strdup("success"); - } else { - result->message = strdup("failed"); - } - ompi_list_append(test_results, &result->item); - - /* check ability to empty segment */ - - - return test_results; -} diff --git a/src/mca/gpr/replica/gpr_replica_internals.h b/src/mca/gpr/replica/gpr_replica_internals.h index 7cb9ce6cc3..ae48b36e06 100644 --- a/src/mca/gpr/replica/gpr_replica_internals.h +++ b/src/mca/gpr/replica/gpr_replica_internals.h @@ -7,18 +7,21 @@ * */ +#ifndef MCA_GPR_REPLICA_INTERNALS_H_ +#define MCA_GPR_REPLICA_INTERNALS_H_ + /** Retrieve a registry key value for a given token string. * The ompi_registry_getkey() function is used to translate a token string for a particular * segment of the registry into its associated (integer) key value. * - * @param segment Pointer to a character string defining the segment of the registry being queried. + * @param seg Pointer to the segment of the registry being queried. * @param token Pointer to a character string containing the token to be translated. If token=NULL, * the function returns the key value corresponding to the specified segment itself. * * @retval key Unsigned long integer value corresponding to the specified token within the specified segment. * @retval -1 Indicates that the segment and/or token could not be found. */ -mca_gpr_replica_key_t gpr_replica_get_key(char *segment, char *token); +mca_gpr_replica_key_t mca_gpr_replica_get_key(mca_gpr_replica_segment_t *seg, char *token); /** Add a token to a segment's dictionary. * The gpr_replica_define_key() function allows the addition of a new definition to @@ -27,13 +30,13 @@ mca_gpr_replica_key_t gpr_replica_get_key(char *segment, char *token); * dictionary. * * @param segment Pointer to a character string defining the segment of the registry being queried. - * @param token Pointer to a character string containing the token to be defined. If token=NULL, + * @param token Pointer to a character string containing the token to be defined. If segment=NULL, * the function adds the token to the segment dictionary, thus defining a new segment name. * * @retval key New key value * @retval MCA_GPR_REPLICA_KEY_MAX Indicates that the dictionary is full or some other error. */ -mca_gpr_replica_key_t gpr_replica_define_key(char *segment, char *token); +mca_gpr_replica_key_t mca_gpr_replica_define_key(mca_gpr_replica_segment_t *seg, char *token); /** Delete a token from a segment's dictionary. * The gpr_replica_deletekey() function allows the removal of a definition from the @@ -51,7 +54,7 @@ mca_gpr_replica_key_t gpr_replica_define_key(char *segment, char *token); * @retval OMPI_ERROR Indicates that the operation failed - most likely caused by specifying * a token that did not exist within the specified segment, or a non-existent segment. */ -int gpr_replica_delete_key(char *segment, char *token); +int mca_gpr_replica_delete_key(mca_gpr_replica_segment_t *seg, char *token); /** Find a requested registry segment. * The gpr_replica_findseq() function finds the registry segment corresponding to @@ -62,45 +65,74 @@ int gpr_replica_delete_key(char *segment, char *token); * @retval *seg Pointer to the segment * @retval NULL Indicates that the specified segment could not be found */ -mca_gpr_replica_segment_t *gpr_replica_find_seg(bool create, char *segment); +mca_gpr_replica_segment_t *mca_gpr_replica_find_seg(bool create, char *segment, + mca_ns_base_jobid_t jobid); -mca_gpr_replica_keytable_t *gpr_replica_find_dict_entry(char *segment, char *token); +mca_gpr_replica_keytable_t +*mca_gpr_replica_find_dict_entry(mca_gpr_replica_segment_t *seg, char *token); -int gpr_replica_empty_segment(mca_gpr_replica_segment_t *seg); +int mca_gpr_replica_empty_segment(mca_gpr_replica_segment_t *seg); -ompi_list_t *gpr_replica_get_key_list(char *segment, char **tokens); +mca_gpr_replica_key_t +*mca_gpr_replica_get_key_list(mca_gpr_replica_segment_t *seg, char **tokens, + int *num_tokens); -bool gpr_replica_check_key_list(ompi_registry_mode_t mode, - mca_gpr_replica_key_t num_keys_search, mca_gpr_replica_key_t *keys, - mca_gpr_replica_key_t num_keys_entry, mca_gpr_replica_key_t *entry_keys); +bool mca_gpr_replica_check_key_list(ompi_registry_mode_t mode, + mca_gpr_replica_key_t num_keys_search, + mca_gpr_replica_key_t *keys, + mca_gpr_replica_key_t num_keys_entry, + mca_gpr_replica_key_t *entry_keys); -mca_gpr_replica_segment_t *gpr_replica_define_segment(char *segment); +mca_gpr_replica_segment_t *mca_gpr_replica_define_segment(char *segment, + mca_ns_base_jobid_t jobid); -mca_gpr_replica_trigger_list_t *gpr_replica_construct_trigger(ompi_registry_synchro_mode_t synchro_mode, - ompi_registry_notify_action_t action, - ompi_registry_mode_t addr_mode, - char *segment, char **tokens, int trigger, - mca_gpr_notify_id_t id_tag); +mca_gpr_replica_trigger_list_t +*mca_gpr_replica_construct_trigger(ompi_registry_synchro_mode_t synchro_mode, + ompi_registry_notify_action_t action, + ompi_registry_mode_t addr_mode, + mca_gpr_replica_segment_t *seg, + mca_gpr_replica_key_t *keys, + int num_keys, + int trigger, + ompi_registry_notify_id_t id_tag); -ompi_registry_notify_message_t *gpr_replica_construct_notify_message(ompi_registry_mode_t addr_mode, - char *segment, char **tokens); +ompi_registry_notify_message_t +*mca_gpr_replica_construct_notify_message(mca_gpr_replica_segment_t *seg, + mca_gpr_replica_trigger_list_t *trig); -bool gpr_replica_process_triggers(char *segment, - mca_gpr_replica_trigger_list_t *trig, - ompi_registry_notify_message_t *message); +bool mca_gpr_replica_process_triggers(mca_gpr_replica_segment_t *seg, + mca_gpr_replica_trigger_list_t *trig, + ompi_registry_notify_message_t *message); -mca_gpr_notify_id_t gpr_replica_remove_trigger(ompi_registry_synchro_mode_t synchro_mode, - ompi_registry_notify_action_t action, - ompi_registry_mode_t addr_mode, - char *segment, char **tokens, int trigger); +ompi_registry_notify_id_t +mca_gpr_replica_remove_trigger(ompi_registry_notify_id_t idtag); -char *gpr_replica_get_token(char *segment, mca_gpr_replica_key_t key); +char *mca_gpr_replica_get_token(mca_gpr_replica_segment_t *seg, mca_gpr_replica_key_t key); -mca_gpr_notify_id_t gpr_replica_enter_notify_request(ompi_process_name_t *requestor, - mca_gpr_notify_id_t idtag, - ompi_registry_notify_cb_fn_t cb_func, - void *user_tag); +ompi_registry_notify_id_t +mca_gpr_replica_enter_notify_request(mca_gpr_replica_segment_t *seg, + ompi_registry_notify_action_t action, + ompi_process_name_t *requestor, + ompi_registry_notify_id_t idtag, + ompi_registry_notify_cb_fn_t cb_func, + void *user_tag); -mca_gpr_notify_id_t gpr_replica_remove_notify_request(mca_gpr_notify_id_t idtag); +ompi_registry_notify_id_t +mca_gpr_replica_remove_notify_request(ompi_registry_notify_id_t idtag); -void gpr_replica_process_callbacks(void); +void mca_gpr_replica_process_callbacks(void); + +int mca_gpr_replica_check_synchros(mca_gpr_replica_segment_t *seg); + +void mca_gpr_replica_check_subscriptions(mca_gpr_replica_segment_t *seg, int8_t action_taken); + +int mca_gpr_replica_purge_subscriptions(ompi_process_name_t *proc); + +ompi_buffer_t +mca_gpr_replica_process_command_buffer(ompi_buffer_t buffer, + ompi_process_name_t *sender, + bool *return_requested, + bool *compound_cmd_detected); + + +#endif diff --git a/src/mca/gpr/replica/gpr_replica_internals_dict_ops.c b/src/mca/gpr/replica/gpr_replica_internals_dict_ops.c new file mode 100644 index 0000000000..25053dfed8 --- /dev/null +++ b/src/mca/gpr/replica/gpr_replica_internals_dict_ops.c @@ -0,0 +1,324 @@ +/* + * $HEADER$ + */ +/** @file: + * + * The Open MPI general purpose registry - support functions. + * + */ + +/* + * includes + */ + +#include "ompi_config.h" + +#include "gpr_replica.h" +#include "gpr_replica_internals.h" + +mca_gpr_replica_keytable_t +*mca_gpr_replica_find_dict_entry(mca_gpr_replica_segment_t *seg, char *token) +{ + mca_gpr_replica_keytable_t *ptr_key; + + + if (NULL == token) { /* just want segment token-key pair */ + /* search the global-level dict to find entry */ + for (ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_get_first(&mca_gpr_replica_head.segment_dict); + ptr_key != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&mca_gpr_replica_head.segment_dict); + ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_get_next(ptr_key)) { + if (seg->key == ptr_key->key) { + return(ptr_key); + } + } + return NULL; /* couldn't find the entry */ + } + + /* want specified token-key pair in that segment's dictionary */ + for (ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_get_first(&seg->keytable); + ptr_key != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&seg->keytable); + ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_get_next(ptr_key)) { + if (0 == strcmp(token, ptr_key->token)) { + return(ptr_key); + } + } + return(NULL); /* couldn't find the specified entry */ +} + + +mca_gpr_replica_key_t mca_gpr_replica_get_key(mca_gpr_replica_segment_t *seg, char *token) +{ + mca_gpr_replica_keytable_t *ptr_key; + + /* find the dictionary entry that matches token */ + ptr_key = mca_gpr_replica_find_dict_entry(seg, token); + if (NULL != ptr_key) { + return(ptr_key->key); + } + return MCA_GPR_REPLICA_KEY_MAX; /* couldn't find dictionary entry */ +} + + +char *mca_gpr_replica_get_token(mca_gpr_replica_segment_t *seg, mca_gpr_replica_key_t key) +{ + mca_gpr_replica_keytable_t *ptr_key; + char *answer; + + if (NULL == seg) { + /* want to find a matching token for a segment name */ + for (ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_get_first(&mca_gpr_replica_head.segment_dict); + ptr_key != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&mca_gpr_replica_head.segment_dict); + ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_get_next(ptr_key)) { + if (key == ptr_key->key) { + answer = strdup(ptr_key->token); + return answer; + } + } + return NULL; /* couldn't find the specified entry */ + } + + /* find the matching key */ + for (ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_get_first(&seg->keytable); + ptr_key != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&seg->keytable); + ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_get_next(ptr_key)) { + if (key == ptr_key->key) { + answer = strdup(ptr_key->token); + return answer; + } + } + return(NULL); /* couldn't find the specified entry */ +} + +mca_gpr_replica_key_t +*mca_gpr_replica_get_key_list(mca_gpr_replica_segment_t *seg, + char **tokens, int *num_tokens) +{ + char **tokptr; + mca_gpr_replica_key_t *keys, *key2; + int num_keys; + + *num_tokens = 0; + + /* check for wild-card case */ + if (NULL == tokens) { + return NULL; + } + + tokptr = tokens; + num_keys = 0; + while (NULL != *tokptr) { + num_keys++; + tokptr++; + } + + keys = (mca_gpr_replica_key_t*)malloc(num_keys*sizeof(mca_gpr_replica_key_t)); + key2 = keys; + *num_tokens = num_keys; + + tokptr = tokens; + + while (NULL != *tokptr) { /* traverse array of tokens until NULL */ + *key2 = mca_gpr_replica_get_key(seg, *tokptr); + if (MCA_GPR_REPLICA_KEY_MAX == *key2) { + *key2 = mca_gpr_replica_define_key(seg, *tokptr); + } + tokptr++; key2++; + } + return keys; +} + +mca_gpr_replica_key_t +mca_gpr_replica_define_key(mca_gpr_replica_segment_t *seg, char *token) +{ + mca_gpr_replica_keytable_t *ptr_key, *new; + + /* if token is NULL, error */ + if (NULL == token) { + return MCA_GPR_REPLICA_KEY_MAX; + } + + /* if seg is NULL, use token to define new segment name in global dictionary */ + if (NULL == seg) { + for (ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_get_first(&mca_gpr_replica_head.segment_dict); + ptr_key != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&mca_gpr_replica_head.segment_dict); + ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_get_next(ptr_key)) { + if (0 == strcmp(token, ptr_key->token)) { + return ptr_key->key; /* already taken, report value */ + } + } + /* okay, token is unique - create dictionary entry */ + new = OBJ_NEW(mca_gpr_replica_keytable_t); + new->token = strdup(token); + if (0 == ompi_list_get_size(&mca_gpr_replica_head.freekeys)) { /* no keys waiting for reuse */ + mca_gpr_replica_head.lastkey++; + new->key = mca_gpr_replica_head.lastkey; + } else { + ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_remove_first(&mca_gpr_replica_head.freekeys); + new->key = ptr_key->key; + } + ompi_list_append(&mca_gpr_replica_head.segment_dict, &new->item); + return new->key; + } + + /* check seg's dictionary to ensure uniqueness */ + for (ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_get_first(&seg->keytable); + ptr_key != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&seg->keytable); + ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_get_next(ptr_key)) { + if (0 == strcmp(token, ptr_key->token)) { + return ptr_key->key; /* already taken, report value */ + } + } + + /* okay, token is unique - create dictionary entry */ + new = OBJ_NEW(mca_gpr_replica_keytable_t); + new->token = strdup(token); + if (0 == ompi_list_get_size(&seg->freekeys)) { /* no keys waiting for reuse */ + seg->lastkey++; + new->key = seg->lastkey; + } else { + ptr_key = (mca_gpr_replica_keytable_t*)ompi_list_remove_first(&seg->freekeys); + new->key = ptr_key->key; + } + ompi_list_append(&seg->keytable, &new->item); + return new->key; +} + + +int mca_gpr_replica_delete_key(mca_gpr_replica_segment_t *seg, char *token) +{ + mca_gpr_replica_core_t *reg; + mca_gpr_replica_keytable_t *ptr_seg, *ptr_key, *new; + mca_gpr_replica_key_t *key; + uint i; + + if (NULL == token) { + /* remove the dictionary entry from the global registry dictionary*/ + ptr_seg = mca_gpr_replica_find_dict_entry(seg, NULL); + if (NULL == ptr_seg) { /* failed to find dictionary entry */ + return OMPI_ERROR; + } + + /* add key to global registry's freekey list */ + new = OBJ_NEW(mca_gpr_replica_keytable_t); + new->token = NULL; + new->key = ptr_seg->key; + ompi_list_append(&mca_gpr_replica_head.freekeys, &new->item); + + /* remove the dictionary entry */ + ompi_list_remove_item(&mca_gpr_replica_head.segment_dict, &ptr_seg->item); + + + return(OMPI_SUCCESS); + + } + + /* token not null, so need to find dictionary element to delete */ + ptr_key = mca_gpr_replica_find_dict_entry(seg, token); + if (NULL != ptr_key) { + /* found key in dictionary */ + /* need to search this segment's registry to find all instances of key & "delete" them */ + for (reg = (mca_gpr_replica_core_t*)ompi_list_get_first(&seg->registry_entries); + reg != (mca_gpr_replica_core_t*)ompi_list_get_end(&seg->registry_entries); + reg = (mca_gpr_replica_core_t*)ompi_list_get_next(reg)) { + + /* check the key list */ + for (i=0, key=reg->keys; i < reg->num_keys; i++, key++) { + if (ptr_key->key == *key) { /* found match */ + *key = MCA_GPR_REPLICA_KEY_MAX; + } + } + + /* add key to this segment's freekey list */ + new = OBJ_NEW(mca_gpr_replica_keytable_t); + new->token = NULL; + new->key = ptr_key->key; + ompi_list_append(&seg->freekeys, &new->item); + + /* now remove the dictionary entry from the segment's dictionary */ + ompi_list_remove_item(&seg->keytable, &ptr_key->item); + return(OMPI_SUCCESS); + } + } + return(OMPI_ERROR); /* if we get here, then we couldn't find token in dictionary */ +} + + +/* + * A mode of "NONE" or "OVERWRITE" defaults to "XAND" behavior + */ +bool mca_gpr_replica_check_key_list(ompi_registry_mode_t addr_mode, + mca_gpr_replica_key_t num_keys_search, mca_gpr_replica_key_t *keys, + mca_gpr_replica_key_t num_keys_entry, mca_gpr_replica_key_t *entry_keys) +{ + mca_gpr_replica_key_t *key1, *key2; + uint num_found; + bool exclusive, no_match; + uint i, j; + + /* check for trivial case */ + if (NULL == keys) { /* wildcard case - automatically true */ + return true; + } + + if (OMPI_REGISTRY_NONE == addr_mode || + OMPI_REGISTRY_OVERWRITE == addr_mode) { /* set default behavior for search */ + addr_mode = OMPI_REGISTRY_XAND; + } + + /* take care of trivial cases that don't require search */ + if ((OMPI_REGISTRY_XAND & addr_mode) && + (num_keys_search != num_keys_entry)) { /* can't possibly turn out "true" */ + return false; + } + + if ((OMPI_REGISTRY_AND & addr_mode) && + (num_keys_search > num_keys_entry)) { /* can't find enough matches */ + return false; + } + + /* okay, have to search for remaining possibilities */ + num_found = 0; + exclusive = true; + for (i=0, key1=entry_keys; i < num_keys_entry; i++, key1++) { + no_match = true; + for (j=0, key2=keys; j < num_keys_search; j++, key2++) { + if (*key1 == *key2) { /* found a match */ + num_found++; + no_match = false; + if (OMPI_REGISTRY_OR & addr_mode) { /* only need one match */ + return true; + } + } + } + if (no_match) { + exclusive = false; + } + } + + if (OMPI_REGISTRY_XAND & addr_mode) { /* deal with XAND case */ + if (num_found == num_keys_entry) { /* found all, and nothing more */ + return true; + } else { /* found either too many or not enough */ + return false; + } + } + + if (OMPI_REGISTRY_XOR & addr_mode) { /* deal with XOR case */ + if (num_found > 0 && exclusive) { /* found at least one and nothing not on list */ + return true; + } else { + return false; + } + } + + if (OMPI_REGISTRY_AND & addr_mode) { /* deal with AND case */ + if (num_found == num_keys_search) { /* found all the required keys */ + return true; + } else { + return false; + } + } + + /* should be impossible situation, but just to be safe... */ + return false; +} diff --git a/src/mca/gpr/replica/gpr_replica_internals_segment_ops.c b/src/mca/gpr/replica/gpr_replica_internals_segment_ops.c new file mode 100644 index 0000000000..7028383470 --- /dev/null +++ b/src/mca/gpr/replica/gpr_replica_internals_segment_ops.c @@ -0,0 +1,116 @@ +/* + * $HEADER$ + */ +/** @file: + * + * The Open MPI general purpose registry - support functions. + * + */ + +/* + * includes + */ + +#include "ompi_config.h" + +#include "gpr_replica.h" +#include "gpr_replica_internals.h" + +mca_gpr_replica_segment_t *mca_gpr_replica_define_segment(char *segment, + mca_ns_base_jobid_t jobid) +{ + mca_gpr_replica_segment_t *seg; + mca_gpr_replica_key_t key; + + if (mca_gpr_replica_debug) { + ompi_output(0, "[%d,%d,%d] define_segment: name %s jobid %d", + OMPI_NAME_ARGS(*ompi_rte_get_self()), segment, (int)jobid); + } + + key = mca_gpr_replica_define_key(NULL, segment); + if (MCA_GPR_REPLICA_KEY_MAX == key) { /* got some kind of error code */ + return NULL; + } + + /* need to add the segment to the registry */ + seg = OBJ_NEW(mca_gpr_replica_segment_t); + seg->name = strdup(segment); + seg->key = key; + seg->owning_job = jobid; + seg->triggers_active = false; + ompi_list_append(&mca_gpr_replica_head.registry, &seg->item); + + + return seg; +} + + +mca_gpr_replica_segment_t *mca_gpr_replica_find_seg(bool create, char *segment, + mca_ns_base_jobid_t jobid) +{ + mca_gpr_replica_keytable_t *ptr_seg; + mca_gpr_replica_segment_t *seg; + + + /* search the registry segments to find which one is being referenced */ + for (ptr_seg = (mca_gpr_replica_keytable_t*)ompi_list_get_first(&mca_gpr_replica_head.segment_dict); + ptr_seg != (mca_gpr_replica_keytable_t*)ompi_list_get_end(&mca_gpr_replica_head.segment_dict); + ptr_seg = (mca_gpr_replica_keytable_t*)ompi_list_get_next(ptr_seg)) { + if (0 == strcmp(segment, ptr_seg->token)) { + /* search mca_gpr_replica_head to find segment */ + for (seg=(mca_gpr_replica_segment_t*)ompi_list_get_first(&mca_gpr_replica_head.registry); + seg != (mca_gpr_replica_segment_t*)ompi_list_get_end(&mca_gpr_replica_head.registry); + seg = (mca_gpr_replica_segment_t*)ompi_list_get_next(seg)) { + if(seg->key == ptr_seg->key) { + return(seg); + } + } + } + } + + + if (create) { + /* didn't find the dictionary entry - create it */ + return mca_gpr_replica_define_segment(segment, jobid); + } + return NULL; /* don't create it - just return NULL */ +} + +int mca_gpr_replica_empty_segment(mca_gpr_replica_segment_t *seg) +{ + mca_gpr_replica_core_t *ptr; + mca_gpr_replica_keytable_t *keytab; + mca_gpr_replica_keylist_t *keylst; + mca_gpr_replica_trigger_list_t *trig; + + /* need to free memory from each entry - remove_last returns pointer to the entry */ + /* need to purge all subscriptions/synchros from notify tracker, and delete from segment */ + + /* empty the segment's registry */ + while (NULL != (ptr = (mca_gpr_replica_core_t*)ompi_list_remove_first(&seg->registry_entries))) { + OBJ_RELEASE(ptr); + } + + /* empty the segment's dictionary */ + while (NULL != (keytab = (mca_gpr_replica_keytable_t*)ompi_list_remove_first(&seg->keytable))) { + OBJ_RELEASE(keytab); + } + + /* empty the list of free keys */ + while (NULL != (keylst = (mca_gpr_replica_keylist_t*)ompi_list_remove_first(&seg->freekeys))) { + OBJ_RELEASE(keylst); + } + + /* empty the list of triggers */ + while (NULL != (trig = (mca_gpr_replica_trigger_list_t*)ompi_list_remove_first(&seg->triggers))) { + OBJ_RELEASE(trig); + } + + /* now remove segment from global registry */ + ompi_list_remove_item(&mca_gpr_replica_head.registry, &seg->item); + OBJ_RELEASE(seg); + + + return OMPI_SUCCESS; +} + diff --git a/src/mca/gpr/replica/gpr_replica_internals_trigger_ops.c b/src/mca/gpr/replica/gpr_replica_internals_trigger_ops.c new file mode 100644 index 0000000000..08deb64cbe --- /dev/null +++ b/src/mca/gpr/replica/gpr_replica_internals_trigger_ops.c @@ -0,0 +1,432 @@ +/* + * $HEADER$ + */ +/** @file: + * + * The Open MPI general purpose registry - support functions. + * + */ + +/* + * includes + */ + +#include "ompi_config.h" + +#include "gpr_replica.h" +#include "gpr_replica_internals.h" + +mca_gpr_replica_trigger_list_t* +mca_gpr_replica_construct_trigger(ompi_registry_synchro_mode_t synchro_mode, + ompi_registry_notify_action_t action, + ompi_registry_mode_t addr_mode, + mca_gpr_replica_segment_t *seg, + mca_gpr_replica_key_t *keys, + int num_keys, + int trigger, + ompi_registry_notify_id_t id_tag) +{ + mca_gpr_replica_core_t *reg; + mca_gpr_replica_trigger_list_t *trig; + mca_gpr_replica_key_t *key2, *keyptr; + int i; + + + trig = OBJ_NEW(mca_gpr_replica_trigger_list_t); + + trig->synch_mode = synchro_mode; + trig->action = action; + trig->addr_mode = addr_mode; + trig->trigger = trigger; + trig->count = 0; + trig->local_idtag = id_tag; + + trig->num_keys = num_keys; + if (0 < num_keys) { + trig->keys = (mca_gpr_replica_key_t*)malloc(num_keys*sizeof(mca_gpr_replica_key_t)); + keyptr = trig->keys; + key2 = keys; + for (i=0; i < num_keys; i++) { + *keyptr = *key2; + keyptr++; key2++; + } + } else { + trig->keys = NULL; + } + + if (OMPI_REGISTRY_SYNCHRO_MODE_NONE != synchro_mode) { /* this is a synchro, so initialize the count */ + /* traverse segment entries and initialize trigger count */ + for (reg = (mca_gpr_replica_core_t*)ompi_list_get_first(&seg->registry_entries); + reg != (mca_gpr_replica_core_t*)ompi_list_get_end(&seg->registry_entries); + reg = (mca_gpr_replica_core_t*)ompi_list_get_next(reg)) { + if (mca_gpr_replica_check_key_list(addr_mode, trig->num_keys, trig->keys, + reg->num_keys, reg->keys)) { + trig->count++; + } + } + + /* initialize edge trigger state */ + if (OMPI_REGISTRY_SYNCHRO_MODE_NONE != trig->synch_mode) { /* looking at synchro event */ + if (trig->count > trig->trigger) { + trig->above_below = MCA_GPR_REPLICA_TRIGGER_ABOVE_LEVEL; + } else if (trig->count < trig->trigger) { + trig->above_below = MCA_GPR_REPLICA_TRIGGER_BELOW_LEVEL; + } else { + trig->above_below = MCA_GPR_REPLICA_TRIGGER_AT_LEVEL; + } + } + } + + + ompi_list_append(&seg->triggers, &trig->item); + + + return trig; + +} + +ompi_registry_notify_id_t +mca_gpr_replica_remove_trigger(ompi_registry_notify_id_t idtag) +{ + /* + * need to register callback to remove entry on remote notify_id_tracker + * if remote_idtag != 0 + */ + + mca_gpr_replica_notify_request_tracker_t *trackptr; + ompi_registry_notify_id_t remote_idtag; + mca_gpr_replica_segment_t *seg; + mca_gpr_replica_trigger_list_t *trig; + + /* find request on notify tracking system */ + for (trackptr = (mca_gpr_replica_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_replica_notify_request_tracker); + trackptr != (mca_gpr_replica_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_replica_notify_request_tracker); + trackptr = (mca_gpr_replica_notify_request_tracker_t*)ompi_list_get_next(trackptr)) { + + if (trackptr->local_idtag == idtag) { + /* find the trigger on the segment and remove it */ + seg = trackptr->segptr; + for (trig = (mca_gpr_replica_trigger_list_t*)ompi_list_get_first(&seg->triggers); + trig != (mca_gpr_replica_trigger_list_t*)ompi_list_get_end(&seg->triggers); + trig = (mca_gpr_replica_trigger_list_t*)ompi_list_get_next(trig)) { + if (trig->local_idtag == idtag) { + ompi_list_remove_item(&seg->triggers, &trig->item); + OBJ_RELEASE(trig); + /* save the remote_idtag so it can be returned */ + remote_idtag = trackptr->remote_idtag; + /* remove the request from the notify tracking system */ + ompi_list_remove_item(&mca_gpr_replica_notify_request_tracker, &trackptr->item); + OBJ_RELEASE(trackptr); + return remote_idtag; + } + } + } + } + return OMPI_REGISTRY_NOTIFY_ID_MAX; /* couldn't find the trigger */ +} + + +ompi_registry_notify_message_t +*mca_gpr_replica_construct_notify_message(mca_gpr_replica_segment_t *seg, + mca_gpr_replica_trigger_list_t *trig) +{ + ompi_list_t *reg_entries; + ompi_registry_value_t *reg, *obj; + ompi_registry_notify_message_t *msg; + mca_gpr_replica_key_t *keyptr; + char **tokptr; + int i; + + if (mca_gpr_replica_debug) { + ompi_output(0, "trigger fired on segment %s", seg->name); + } + + reg_entries = mca_gpr_replica_get_nl(trig->addr_mode, seg, trig->keys, trig->num_keys); + + msg = OBJ_NEW(ompi_registry_notify_message_t); + msg->segment = strdup(seg->name); + msg->owning_job = seg->owning_job; + msg->num_tokens = trig->num_keys; + if(0 < trig->num_keys) { + msg->tokens = (char**)malloc(trig->num_keys*(sizeof(char*))); + keyptr = trig->keys; + tokptr = msg->tokens; + for (i=0; i < (int)msg->num_tokens; i++, keyptr++, tokptr++) { + *tokptr = mca_gpr_replica_get_token(seg, *keyptr); + } + } else { + msg->tokens = NULL; + } + + while (NULL != (reg = (ompi_registry_value_t*)ompi_list_remove_first(reg_entries))) { + obj = OBJ_NEW(ompi_registry_value_t); + obj->object = (ompi_registry_object_t)malloc(reg->object_size); + memcpy(obj->object, reg->object, reg->object_size); + obj->object_size = reg->object_size; + ompi_list_append(&msg->data, &obj->item); + OBJ_RELEASE(reg); + } + + OBJ_RELEASE(reg_entries); + if (mca_gpr_replica_debug) { + ompi_output(0, "[%d,%d,%d] gpr replica-construct_notify: msg built", + OMPI_NAME_ARGS(*ompi_rte_get_self())); + } + + return msg; +} + +bool mca_gpr_replica_process_triggers(mca_gpr_replica_segment_t *seg, + mca_gpr_replica_trigger_list_t *trig, + ompi_registry_notify_message_t *message) +{ + mca_gpr_replica_notify_request_tracker_t *trackptr; + bool found; + mca_gpr_replica_callbacks_t *cb; + + if (mca_gpr_replica_debug) { + ompi_output(0, "[%d,%d,%d] gpr replica: process_trig entered", + OMPI_NAME_ARGS(*ompi_rte_get_self())); + } + + /* find corresponding notify request */ + found = false; + for (trackptr = (mca_gpr_replica_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_replica_notify_request_tracker); + trackptr != (mca_gpr_replica_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_replica_notify_request_tracker); + trackptr = (mca_gpr_replica_notify_request_tracker_t*)ompi_list_get_next(trackptr)) { + if (trackptr->local_idtag == trig->local_idtag) { + found = true; + break; + } + } + + if (!found) { /* didn't find request */ + ompi_output(0, "Notification error - request not found"); + /* OMPI_THREAD_UNLOCK(&mca_gpr_replica_internals_mutex); */ + return true; + } + + /* process request */ + cb = OBJ_NEW(mca_gpr_replica_callbacks_t); + if (NULL == trackptr->requestor) { /* local request - queue callback fn with their tag */ + cb->requestor = NULL; + cb->cb_func = trackptr->callback; + cb->user_tag = trackptr->user_tag; + cb->message = message; + cb->remote_idtag = OMPI_REGISTRY_NOTIFY_ID_MAX; + + } else { /* remote request - queue remote callback */ + cb->requestor = ompi_name_server.copy_process_name(trackptr->requestor); + cb->cb_func = NULL; + cb->user_tag = NULL; + cb->message = message; + cb->remote_idtag = trackptr->remote_idtag; + } + ompi_list_append(&mca_gpr_replica_callbacks, &cb->item); + + /* if one-shot, remove request from tracking system */ + if ((OMPI_REGISTRY_SYNCHRO_MODE_ONE_SHOT & trig->synch_mode) || + (OMPI_REGISTRY_NOTIFY_ONE_SHOT & trig->action)) { + ompi_list_remove_item(&mca_gpr_replica_notify_request_tracker, &trackptr->item); + OBJ_RELEASE(trackptr); + + /* ....and from the corresponding registry segment */ + ompi_list_remove_item(&seg->triggers, &trig->item); + OBJ_RELEASE(trig); + } + if (mca_gpr_replica_debug) { + ompi_output(0, "[%d,%d,%d] gpr replica-process_trig: complete", + OMPI_NAME_ARGS(*ompi_rte_get_self())); + } + + return false; + + +} + + +int mca_gpr_replica_purge_subscriptions(ompi_process_name_t *proc) +{ + mca_gpr_replica_segment_t *seg; + mca_gpr_replica_notify_request_tracker_t *trackptr, *next; + mca_gpr_replica_trigger_list_t *trig, *next_trig; + + if (NULL == proc) { /* protect against errors */ + return OMPI_ERROR; + } + + /* locate any notification events that have proc as the recipient + */ + for (trackptr = (mca_gpr_replica_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_replica_notify_request_tracker); + trackptr != (mca_gpr_replica_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_replica_notify_request_tracker);) { + next = (mca_gpr_replica_notify_request_tracker_t*)ompi_list_get_next(trackptr); + if ((NULL != trackptr->requestor && + 0 == ompi_name_server.compare(OMPI_NS_CMP_ALL, proc, trackptr->requestor)) || + (NULL == trackptr->requestor && + 0 == ompi_name_server.compare(OMPI_NS_CMP_ALL, proc, ompi_rte_get_self()))) { + + /* ...find the associated subscription... */ + if (NULL != trackptr->segptr) { + seg = trackptr->segptr; + for (trig = (mca_gpr_replica_trigger_list_t*)ompi_list_get_first(&seg->triggers); + trig != (mca_gpr_replica_trigger_list_t*)ompi_list_get_end(&seg->triggers); + ) { + next_trig = (mca_gpr_replica_trigger_list_t*)ompi_list_get_next(trig); + if (trackptr->local_idtag == trig->local_idtag) { /* found it */ + /* ...delete it... */ + ompi_list_remove_item(&seg->triggers, &trig->item); + } + trig = next_trig; + } + } + /* ...and delete me too! */ + ompi_list_remove_item(&mca_gpr_replica_notify_request_tracker, &trackptr->item); + OBJ_RELEASE(trackptr); + } + trackptr = next; + } + + return OMPI_SUCCESS; +} + + +ompi_registry_notify_id_t +mca_gpr_replica_enter_notify_request(mca_gpr_replica_segment_t *seg, + ompi_registry_notify_action_t action, + ompi_process_name_t *requestor, + ompi_registry_notify_id_t idtag, + ompi_registry_notify_cb_fn_t cb_func, + void *user_tag) + { + mca_gpr_replica_notify_request_tracker_t *trackptr; + mca_gpr_idtag_list_t *ptr_free_id; + + trackptr = OBJ_NEW(mca_gpr_replica_notify_request_tracker_t); + trackptr->segptr = seg; + trackptr->action = action; + trackptr->requestor = ompi_name_server.copy_process_name(requestor); + trackptr->local_idtag = idtag; + trackptr->remote_idtag = OMPI_REGISTRY_NOTIFY_ID_MAX; + trackptr->callback = cb_func; + trackptr->user_tag = user_tag; + if (ompi_list_is_empty(&mca_gpr_replica_free_notify_id_tags)) { + trackptr->local_idtag = mca_gpr_replica_last_notify_id_tag; + mca_gpr_replica_last_notify_id_tag++; + } else { + ptr_free_id = (mca_gpr_idtag_list_t*)ompi_list_remove_first(&mca_gpr_replica_free_notify_id_tags); + trackptr->local_idtag = ptr_free_id->id_tag; + } + ompi_list_append(&mca_gpr_replica_notify_request_tracker, &trackptr->item); + + return trackptr->local_idtag; +} + + +ompi_registry_notify_id_t mca_gpr_replica_remove_notify_request(ompi_registry_notify_id_t idtag) +{ + mca_gpr_replica_notify_request_tracker_t *trackptr; + mca_gpr_idtag_list_t *ptr_free_id; + ompi_registry_notify_id_t remote_idtag; + + /* find request on replica notify tracking system */ + for (trackptr = (mca_gpr_replica_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_replica_notify_request_tracker); + trackptr != (mca_gpr_replica_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_replica_notify_request_tracker) && + trackptr->local_idtag != idtag; + trackptr = (mca_gpr_replica_notify_request_tracker_t*)ompi_list_get_next(trackptr)); + + if (trackptr != (mca_gpr_replica_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_replica_notify_request_tracker)) { + /* save the remote idtag */ + remote_idtag = trackptr->remote_idtag; + + /* ...and remove the request */ + ompi_list_remove_item(&mca_gpr_replica_notify_request_tracker, &trackptr->item); + /* put local id tag on free list */ + ptr_free_id = OBJ_NEW(mca_gpr_idtag_list_t); + ptr_free_id->id_tag = trackptr->local_idtag; + ompi_list_append(&mca_gpr_replica_free_notify_id_tags, &ptr_free_id->item); + /* release tracker item */ + OBJ_RELEASE(trackptr); + + return remote_idtag; + } + /* error condition if reach here */ + return OMPI_REGISTRY_NOTIFY_ID_MAX; +} + +int mca_gpr_replica_check_synchros(mca_gpr_replica_segment_t *seg) +{ + mca_gpr_replica_trigger_list_t *trig; + ompi_registry_notify_message_t *notify_msg; + mca_gpr_replica_trigger_list_t* next; + bool still_valid=false; + + /* search the segment and re-compute the trigger levels */ + + /* check for trigger conditions */ + for (trig = (mca_gpr_replica_trigger_list_t*)ompi_list_get_first(&seg->triggers); + trig != (mca_gpr_replica_trigger_list_t*)ompi_list_get_end(&seg->triggers); + ) { + next = (mca_gpr_replica_trigger_list_t*)ompi_list_get_next(trig); + still_valid = true; + if (((OMPI_REGISTRY_SYNCHRO_MODE_ASCENDING & trig->synch_mode) + && (trig->count >= trig->trigger) + && (MCA_GPR_REPLICA_TRIGGER_BELOW_LEVEL == trig->above_below)) || + ((OMPI_REGISTRY_SYNCHRO_MODE_DESCENDING & trig->synch_mode) + && (trig->count <= trig->trigger) + && (MCA_GPR_REPLICA_TRIGGER_ABOVE_LEVEL == trig->above_below)) || + (OMPI_REGISTRY_SYNCHRO_MODE_LEVEL & trig->synch_mode && trig->count == trig->trigger) || + (OMPI_REGISTRY_SYNCHRO_MODE_GT_EQUAL & trig->synch_mode && trig->count >= trig->trigger)) { + + notify_msg = mca_gpr_replica_construct_notify_message(seg, trig); + notify_msg->trig_action = OMPI_REGISTRY_NOTIFY_NONE; + notify_msg->trig_synchro = trig->synch_mode; + still_valid = mca_gpr_replica_process_triggers(seg, trig, notify_msg); + + } + if (still_valid) { + if (trig->count > trig->trigger) { + trig->above_below = MCA_GPR_REPLICA_TRIGGER_ABOVE_LEVEL; + } else if (trig->count == trig->trigger) { + trig->above_below = MCA_GPR_REPLICA_TRIGGER_AT_LEVEL; + } + } + trig = next; + } + return OMPI_SUCCESS; +} + +void mca_gpr_replica_check_subscriptions(mca_gpr_replica_segment_t *seg, int8_t action_taken) +{ + mca_gpr_replica_trigger_list_t *trig; + ompi_registry_notify_message_t *notify_msg; + mca_gpr_replica_trigger_list_t* next; + bool still_valid=false; + + if (!seg->triggers_active) { /* triggers are not active */ + return; + } + + for (trig = (mca_gpr_replica_trigger_list_t*)ompi_list_get_first(&seg->triggers); + trig != (mca_gpr_replica_trigger_list_t*)ompi_list_get_end(&seg->triggers); + ) { + next = (mca_gpr_replica_trigger_list_t*)ompi_list_get_next(trig); + if ((OMPI_REGISTRY_NOTIFY_ALL & trig->action) || + ((OMPI_REGISTRY_NOTIFY_ADD_ENTRY & trig->action) && (MCA_GPR_REPLICA_OBJECT_ADDED == action_taken)) || + ((OMPI_REGISTRY_NOTIFY_MODIFICATION & trig->action) && (MCA_GPR_REPLICA_OBJECT_UPDATED == action_taken)) || + ((OMPI_REGISTRY_NOTIFY_DELETE_ENTRY & trig->action) && (MCA_GPR_REPLICA_OBJECT_DELETED == action_taken)) || + ((OMPI_REGISTRY_NOTIFY_ADD_SUBSCRIBER & trig->action) && (MCA_GPR_REPLICA_SUBSCRIBER_ADDED == action_taken))) { + notify_msg = mca_gpr_replica_construct_notify_message(seg, trig); + notify_msg->trig_action = trig->action; + notify_msg->trig_synchro = OMPI_REGISTRY_SYNCHRO_MODE_NONE; + still_valid = mca_gpr_replica_process_triggers(seg, trig, notify_msg); + } + if (still_valid) { + if (trig->count > trig->trigger) { + trig->above_below = MCA_GPR_REPLICA_TRIGGER_ABOVE_LEVEL; + } else if (trig->count == trig->trigger) { + trig->above_below = MCA_GPR_REPLICA_TRIGGER_AT_LEVEL; + } + } + trig = next; + } +} diff --git a/src/mca/gpr/replica/gpr_replica_messaging.c b/src/mca/gpr/replica/gpr_replica_messaging.c new file mode 100644 index 0000000000..c6dad341e5 --- /dev/null +++ b/src/mca/gpr/replica/gpr_replica_messaging.c @@ -0,0 +1,44 @@ +/* -*- C -*- + * + * $HEADER$ + * + */ +/** @file + */ + +#include "ompi_config.h" + +#include "gpr_replica.h" + + +void mca_gpr_replica_deliver_notify_msg(ompi_registry_notify_action_t state, + ompi_registry_notify_message_t *message) +{ + int namelen; + mca_gpr_replica_notify_request_tracker_t *trackptr; + mca_gpr_replica_segment_t *seg; + + /* protect system from threadlock */ + if ((OMPI_REGISTRY_NOTIFY_ON_STARTUP & state) || + (OMPI_REGISTRY_NOTIFY_ON_SHUTDOWN & state)) { + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + + namelen = strlen(message->segment); + + /* find the request corresponding to this notify */ + for (trackptr = (mca_gpr_replica_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_replica_notify_request_tracker); + trackptr != (mca_gpr_replica_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_replica_notify_request_tracker); + trackptr = (mca_gpr_replica_notify_request_tracker_t*)ompi_list_get_next(trackptr)) { + seg = trackptr->segptr; + if ((trackptr->action & state) && + (0 == strncmp(message->segment, seg->name, namelen))) { + /* process request - callback function responsible for releasing memory */ + trackptr->callback(message, trackptr->user_tag); + } + } + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + } + +} diff --git a/src/mca/gpr/replica/gpr_replica_mode_ops.c b/src/mca/gpr/replica/gpr_replica_mode_ops.c new file mode 100644 index 0000000000..04e510a98d --- /dev/null +++ b/src/mca/gpr/replica/gpr_replica_mode_ops.c @@ -0,0 +1,201 @@ +/* -*- C -*- + * + * $HEADER$ + */ +/** @file: + * + * The Open MPI General Purpose Registry - Replica component + * + */ + +/* + * includes + */ +#include "ompi_config.h" + +#include "gpr_replica.h" +#include "gpr_replica_internals.h" + + +void mca_gpr_replica_silent_mode_on(void) +{ + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + mca_gpr_replica_silent_mode = true; + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); +} + +void mca_gpr_replica_silent_mode_off(void) +{ + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + mca_gpr_replica_silent_mode = false; + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); +} + +void mca_gpr_replica_notify_on(ompi_registry_notify_id_t sub_number) +{ + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + mca_gpr_replica_notify_on_nl(ompi_rte_get_self(), sub_number); + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); +} + +void mca_gpr_replica_notify_on_nl(ompi_process_name_t *proc, + ompi_registry_notify_id_t sub_number) +{ + mca_gpr_replica_notify_off_t *ptr, *nextptr; + + for (ptr = (mca_gpr_replica_notify_off_t*)ompi_list_get_first(&mca_gpr_replica_notify_off_list); + ptr != (mca_gpr_replica_notify_off_t*)ompi_list_get_end(&mca_gpr_replica_notify_off_list); + ) { + nextptr = (mca_gpr_replica_notify_off_t*)ompi_list_get_next(ptr); + if (0 == ompi_name_server.compare(OMPI_NS_CMP_ALL, ptr->proc, proc)) { + if ((OMPI_REGISTRY_NOTIFY_ID_MAX == sub_number) || + (ptr->sub_number == sub_number)) { + ompi_list_remove_item(&mca_gpr_replica_notify_off_list, &ptr->item); + OBJ_RELEASE(ptr); + } + } + ptr = nextptr; + } +} + + +void mca_gpr_replica_notify_off(ompi_registry_notify_id_t sub_number) +{ + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + mca_gpr_replica_notify_off_nl(ompi_rte_get_self(), sub_number); + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); +} + + +void mca_gpr_replica_notify_off_nl(ompi_process_name_t *proc, + ompi_registry_notify_id_t sub_number) +{ + mca_gpr_replica_notify_off_t *ptr; + + /* check to see if this is already on the list - return if so */ + for (ptr = (mca_gpr_replica_notify_off_t*)ompi_list_get_first(&mca_gpr_replica_notify_off_list); + ptr != (mca_gpr_replica_notify_off_t*)ompi_list_get_end(&mca_gpr_replica_notify_off_list); + ptr = (mca_gpr_replica_notify_off_t*)ompi_list_get_next(ptr)) { + if (0 == ompi_name_server.compare(OMPI_NS_CMP_ALL, ptr->proc, proc)) { + if (OMPI_REGISTRY_NOTIFY_ID_MAX == sub_number) { /* if wild card, remove all others on list */ + ompi_list_remove_item(&mca_gpr_replica_notify_off_list, &ptr->item); + OBJ_RELEASE(ptr); + } else if (ptr->sub_number == sub_number) { + return; + } + } + } + + /* either wild card or not already on list - add it */ + ptr = OBJ_NEW(mca_gpr_replica_notify_off_t); + ptr->sub_number = sub_number; + ptr->proc = ompi_name_server.copy_process_name(proc); + ompi_list_append(&mca_gpr_replica_notify_off_list, &ptr->item); +} + + +void mca_gpr_replica_triggers_active(mca_ns_base_jobid_t jobid) +{ + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + mca_gpr_replica_triggers_active_nl(jobid); + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); +} + + +void mca_gpr_replica_triggers_active_nl(mca_ns_base_jobid_t jobid) +{ + mca_gpr_replica_segment_t *seg; + mca_gpr_replica_notify_off_t *ptr, *nextptr; + + /* traverse the registry */ + /* enable triggers on segments from this jobid */ + for (seg = (mca_gpr_replica_segment_t*)ompi_list_get_first(&mca_gpr_replica_head.registry); + seg != (mca_gpr_replica_segment_t*)ompi_list_get_end(&mca_gpr_replica_head.registry); + seg = (mca_gpr_replica_segment_t*)ompi_list_get_next(seg)) { + + if (seg->owning_job == jobid) { + seg->triggers_active = true; + } + } + + /* check the list of process names with notification turned off + * and turn on those from this jobid + */ + for (ptr = (mca_gpr_replica_notify_off_t*)ompi_list_get_first(&mca_gpr_replica_notify_off_list); + ptr != (mca_gpr_replica_notify_off_t*)ompi_list_get_end(&mca_gpr_replica_notify_off_list); + ) { + + nextptr = (mca_gpr_replica_notify_off_t*)ompi_list_get_next(ptr); + + if (jobid == ompi_name_server.get_jobid(ptr->proc)) { + ompi_list_remove_item(&mca_gpr_replica_notify_off_list, &ptr->item); + OBJ_RELEASE(ptr); + } + ptr = nextptr; + } +} + + +void mca_gpr_replica_triggers_inactive(mca_ns_base_jobid_t jobid) +{ + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + mca_gpr_replica_triggers_inactive_nl(jobid); + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); +} + + +void mca_gpr_replica_triggers_inactive_nl(mca_ns_base_jobid_t jobid) +{ + mca_gpr_replica_segment_t *seg; + + /* traverse the registry */ + /* disable triggers on segments from this jobid */ + for (seg = (mca_gpr_replica_segment_t*)ompi_list_get_first(&mca_gpr_replica_head.registry); + seg != (mca_gpr_replica_segment_t*)ompi_list_get_end(&mca_gpr_replica_head.registry); + seg = (mca_gpr_replica_segment_t*)ompi_list_get_next(seg)) { + + if (seg->owning_job == jobid) { + seg->triggers_active = false; + } + } + +} + + +int mca_gpr_replica_assume_ownership(char *segment) +{ + int rc; + mca_ns_base_jobid_t jobid; + mca_gpr_replica_segment_t *seg; + + /* protect against error */ + if (NULL == segment) { + return OMPI_ERROR; + } + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + + jobid = ompi_name_server.get_jobid(ompi_rte_get_self()); + + /* find the segment */ + seg = mca_gpr_replica_find_seg(true, segment, jobid); + if (NULL == seg) { /* segment couldn't be found or created */ + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + return OMPI_ERROR; + } + + rc = mca_gpr_replica_assume_ownership_nl(seg, jobid); + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + + return rc; +} + +int mca_gpr_replica_assume_ownership_nl(mca_gpr_replica_segment_t *seg, mca_ns_base_jobid_t jobid) +{ + + seg->owning_job = jobid; + + return OMPI_SUCCESS; +} diff --git a/src/mca/gpr/replica/gpr_replica_put_get.c b/src/mca/gpr/replica/gpr_replica_put_get.c new file mode 100644 index 0000000000..03c2b53474 --- /dev/null +++ b/src/mca/gpr/replica/gpr_replica_put_get.c @@ -0,0 +1,229 @@ +/* + * $HEADER$ + */ +/** @file: + * + * The Open MPI general purpose registry - implementation. + * + */ + +/* + * includes + */ + +#include "ompi_config.h" + +#include "gpr_replica.h" +#include "gpr_replica_internals.h" + +int mca_gpr_replica_put(ompi_registry_mode_t addr_mode, char *segment, + char **tokens, ompi_registry_object_t object, + ompi_registry_object_size_t size) +{ + int rc; + int8_t action_taken; + mca_gpr_replica_segment_t *seg; + mca_gpr_replica_key_t *keys; + int num_keys; + + /* protect ourselves against errors */ + if (NULL == segment || NULL == object || 0 == size || + NULL == tokens || NULL == *tokens) { + if (mca_gpr_replica_debug) { + ompi_output(0, "[%d,%d,%d] gpr replica: error in input - put rejected", + OMPI_NAME_ARGS(*ompi_rte_get_self())); + } + return OMPI_ERROR; + } + + if (mca_gpr_replica_compound_cmd_mode) { + return mca_gpr_base_pack_put(mca_gpr_replica_compound_cmd, + mca_gpr_replica_silent_mode, + addr_mode, segment, + tokens, object, size); + } + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + + /* find the segment */ + seg = mca_gpr_replica_find_seg(true, segment, + ompi_name_server.get_jobid(ompi_rte_get_self())); + if (NULL == seg) { /* couldn't find segment or create it */ + return OMPI_ERROR; + } + + /* convert tokens to array of keys */ + keys = mca_gpr_replica_get_key_list(seg, tokens, &num_keys); + + rc = mca_gpr_replica_put_nl(addr_mode, seg, keys, num_keys, + object, size, &action_taken); + + mca_gpr_replica_check_subscriptions(seg, action_taken); + + mca_gpr_replica_check_synchros(seg); + + /* release list of keys */ + if (NULL != keys) { + free(keys); + } + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + + mca_gpr_replica_process_callbacks(); + + return rc; +} + +int mca_gpr_replica_put_nl(ompi_registry_mode_t addr_mode, + mca_gpr_replica_segment_t *seg, + mca_gpr_replica_key_t *keys, + int num_keys, ompi_registry_object_t object, + ompi_registry_object_size_t size, + int8_t *action_taken) +{ + mca_gpr_replica_core_t *entry_ptr; + ompi_registry_mode_t put_mode; + mca_gpr_replica_trigger_list_t *trig; + int return_code; + + + if (mca_gpr_replica_debug) { + ompi_output(0, "[%d,%d,%d] gpr replica: put entered on segment %s", + OMPI_NAME_ARGS(*ompi_rte_get_self()), seg->name); + } + + /* ignore addressing mode - all tokens are used + * only overwrite permission mode flag has any affect + */ + put_mode = addr_mode & OMPI_REGISTRY_OVERWRITE; + + /* see if specified entry already exists */ + for (entry_ptr = (mca_gpr_replica_core_t*)ompi_list_get_first(&seg->registry_entries); + entry_ptr != (mca_gpr_replica_core_t*)ompi_list_get_end(&seg->registry_entries); + entry_ptr = (mca_gpr_replica_core_t*)ompi_list_get_next(entry_ptr)) { + if (mca_gpr_replica_check_key_list(put_mode, num_keys, keys, + entry_ptr->num_keys, entry_ptr->keys)) { + /* found existing entry - overwrite if mode set, else error */ + if (put_mode) { /* overwrite enabled */ + free(entry_ptr->object); + entry_ptr->object = NULL; + entry_ptr->object_size = size; + entry_ptr->object = (ompi_registry_object_t)malloc(size); + memcpy(entry_ptr->object, object, size); + return_code = OMPI_SUCCESS; + *action_taken = MCA_GPR_REPLICA_OBJECT_UPDATED; + goto CLEANUP; + } else { + return_code = OMPI_ERROR; + goto CLEANUP; + } + } + } + + /* no existing entry - create new one */ + entry_ptr = OBJ_NEW(mca_gpr_replica_core_t); + entry_ptr->keys = (mca_gpr_replica_key_t*)malloc(num_keys*sizeof(mca_gpr_replica_key_t)); + memcpy(entry_ptr->keys, keys, num_keys*sizeof(mca_gpr_replica_key_t)); + entry_ptr->num_keys = num_keys; + entry_ptr->object_size = size; + entry_ptr->object = (ompi_registry_object_t*)malloc(size); + memcpy(entry_ptr->object, object, size); + ompi_list_append(&seg->registry_entries, &entry_ptr->item); + + *action_taken = MCA_GPR_REPLICA_OBJECT_ADDED; + return_code = OMPI_SUCCESS; + + /* update trigger list */ + for (trig = (mca_gpr_replica_trigger_list_t*)ompi_list_get_first(&seg->triggers); + trig != (mca_gpr_replica_trigger_list_t*)ompi_list_get_end(&seg->triggers); + trig = (mca_gpr_replica_trigger_list_t*)ompi_list_get_next(trig)) { + if (mca_gpr_replica_check_key_list(trig->addr_mode, trig->num_keys, trig->keys, + num_keys, keys)) { + trig->count++; + } + } + + CLEANUP: + if (mca_gpr_replica_debug) { + ompi_output(0, "[%d,%d,%d] gpr replica-put: complete", OMPI_NAME_ARGS(*ompi_rte_get_self())); + } + + return return_code; +} + + +ompi_list_t* mca_gpr_replica_get(ompi_registry_mode_t addr_mode, + char *segment, char **tokens) +{ + ompi_list_t* list; + mca_gpr_replica_segment_t *seg; + mca_gpr_replica_key_t *keys; + int num_keys; + + /* protect against errors */ + if (NULL == segment) { + return list; + } + + if (mca_gpr_replica_compound_cmd_mode) { + mca_gpr_base_pack_get(mca_gpr_replica_compound_cmd, addr_mode, segment, tokens); + return NULL; + } + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + + /* find the specified segment */ + seg = mca_gpr_replica_find_seg(false, segment, MCA_NS_BASE_JOBID_MAX); + if (NULL == seg) { /* segment not found */ + return list; + } + + /* convert tokens to array of keys */ + keys = mca_gpr_replica_get_key_list(seg, tokens, &num_keys); + + list = mca_gpr_replica_get_nl(addr_mode, seg, keys, num_keys); + + if (NULL != keys) { + free(keys); + } + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + return list; +} + +ompi_list_t* mca_gpr_replica_get_nl(ompi_registry_mode_t addr_mode, + mca_gpr_replica_segment_t *seg, + mca_gpr_replica_key_t *keys, + int num_keys) +{ + ompi_list_t *answer=NULL; + ompi_registry_value_t *ans=NULL; + mca_gpr_replica_core_t *reg=NULL; + + if (mca_gpr_replica_debug) { + ompi_output(0, "[%d,%d,%d] gpr replica: get entered", OMPI_NAME_ARGS(*ompi_rte_get_self())); + } + + answer = OBJ_NEW(ompi_list_t); + + /* traverse the segment's registry, looking for matching tokens per the specified mode */ + for (reg = (mca_gpr_replica_core_t*)ompi_list_get_first(&seg->registry_entries); + reg != (mca_gpr_replica_core_t*)ompi_list_get_end(&seg->registry_entries); + reg = (mca_gpr_replica_core_t*)ompi_list_get_next(reg)) { + + /* for each registry entry, check the key list */ + if (mca_gpr_replica_check_key_list(addr_mode, num_keys, keys, + reg->num_keys, reg->keys)) { /* found the key(s) on the list */ + ans = OBJ_NEW(ompi_registry_value_t); + ans->object_size = reg->object_size; + ans->object = (ompi_registry_object_t*)malloc(ans->object_size); + memcpy(ans->object, reg->object, ans->object_size); + ompi_list_append(answer, &ans->item); + } + } + if (mca_gpr_replica_debug) { + ompi_output(0, "[%d,%d,%d] gpr replica-get: finished search", OMPI_NAME_ARGS(*ompi_rte_get_self())); + } + + return answer; +} diff --git a/src/mca/gpr/replica/gpr_replica_recv_proxy_msgs.c b/src/mca/gpr/replica/gpr_replica_recv_proxy_msgs.c new file mode 100644 index 0000000000..a322655830 --- /dev/null +++ b/src/mca/gpr/replica/gpr_replica_recv_proxy_msgs.c @@ -0,0 +1,1346 @@ +/* -*- C -*- + * + * $HEADER$ + */ +/** @file: + * + * The Open MPI General Purpose Registry - Replica component + * + */ + +/* + * includes + */ +#include "ompi_config.h" + +#include "gpr_replica.h" +#include "gpr_replica_internals.h" + +/* + * define the local functions for processing commands + */ +static int32_t mca_gpr_replica_recv_delete_segment_cmd(ompi_buffer_t buffer); +static int32_t mca_gpr_replica_recv_put_cmd(ompi_buffer_t buffer); +static ompi_list_t* mca_gpr_replica_recv_get_cmd(ompi_buffer_t buffer); +static int32_t mca_gpr_replica_recv_delete_object_cmd(ompi_buffer_t buffer); +static ompi_list_t* mca_gpr_replica_recv_index_cmd(ompi_buffer_t buffer); +static ompi_registry_notify_id_t mca_gpr_replica_recv_subscribe_cmd(ompi_process_name_t* sender, ompi_buffer_t buffer); +static int32_t mca_gpr_replica_recv_unsubscribe_cmd(ompi_buffer_t buffer); +static ompi_registry_notify_id_t mca_gpr_replica_recv_synchro_cmd(ompi_process_name_t* sender, ompi_buffer_t buffer); +static int32_t mca_gpr_replica_recv_cancel_synchro_cmd(ompi_buffer_t buffer); +static void mca_gpr_replica_recv_dump_cmd(ompi_buffer_t answer); +static void mca_gpr_replica_recv_get_startup_msg_cmd(ompi_buffer_t buffer, ompi_buffer_t answer); +static void mca_gpr_replica_recv_get_shutdown_msg_cmd(ompi_buffer_t buffer, ompi_buffer_t answer); +static void mca_gpr_replica_recv_triggers_active_cmd(ompi_buffer_t buffer); +static void mca_gpr_replica_recv_triggers_inactive_cmd(ompi_buffer_t buffer); +static void mca_gpr_replica_recv_cleanup_job_cmd(ompi_buffer_t buffer); +static void mca_gpr_replica_recv_cleanup_proc_cmd(ompi_buffer_t buffer); +static void mca_gpr_replica_recv_notify_on_cmd(ompi_buffer_t buffer); +static void mca_gpr_replica_recv_notify_off_cmd(ompi_buffer_t buffer); +static int32_t mca_gpr_replica_recv_assume_ownership_cmd(ompi_buffer_t buffer); + +static bool mca_gpr_replica_recv_silent_mode(ompi_buffer_t buffer); + + +/* + * handle message from proxies + */ + +void mca_gpr_replica_recv(int status, ompi_process_name_t* sender, + ompi_buffer_t buffer, int tag, + void* cbdata) +{ + ompi_buffer_t answer; + size_t buf_size=0; + bool return_requested; + bool compound_cmd_detected; + + if (mca_gpr_replica_debug) { + ompi_output(0, "gpr replica: received message"); + } + + return_requested = true; + compound_cmd_detected = false; + + if (NULL != (answer = mca_gpr_replica_process_command_buffer(buffer, sender, + &return_requested, + &compound_cmd_detected))) { + + ompi_buffer_size(answer, &buf_size); + + if ((compound_cmd_detected && return_requested) || + (!compound_cmd_detected && 0 < buf_size)) { /* must be some data or status codes to return */ + if (0 > mca_oob_send_packed(sender, answer, tag, 0)) { + /* RHC -- not sure what to do if the return send fails */ + } + } + + ompi_buffer_free(answer); + if (mca_gpr_replica_debug) { + ompi_output(0, "gpr replica: msg processing complete - processing callbacks"); + } + + mca_gpr_replica_process_callbacks(); + } + + /* reissue the non-blocking receive */ + mca_oob_recv_packed_nb(MCA_OOB_NAME_ANY, MCA_OOB_TAG_GPR, 0, mca_gpr_replica_recv, NULL); +} + + +ompi_buffer_t mca_gpr_replica_process_command_buffer(ompi_buffer_t buffer, + ompi_process_name_t *sender, + bool *return_requested, + bool *compound_cmd_detected) +{ + ompi_buffer_t answer, error_answer; + ompi_registry_value_t *regval=NULL; + ompi_list_t *returned_list=NULL; + ompi_registry_internal_test_results_t *testval=NULL; + ompi_registry_index_value_t *indexval=NULL; + ompi_registry_notify_id_t return_tag=OMPI_REGISTRY_NOTIFY_ID_MAX; + int32_t test_level=0; + mca_gpr_cmd_flag_t command; + int32_t response=0; + int8_t tmp_bool; + + + if (OMPI_SUCCESS != ompi_buffer_init(&answer, 0)) { + /* RHC -- not sure what to do if this fails */ + return NULL; + } + + while (OMPI_SUCCESS == ompi_unpack(buffer, &command, 1, MCA_GPR_OOB_PACK_CMD)) { + + switch(command) { + + case MCA_GPR_COMPOUND_CMD: /***** COMPOUND COMMAND ******/ + if (mca_gpr_replica_debug) { + ompi_output(0, "\tcompound cmd"); + } + + if (OMPI_ERROR == ompi_unpack(buffer, &tmp_bool, 1, MCA_GPR_OOB_PACK_BOOL)) { + goto RETURN_ERROR; + } + + *return_requested = (bool)tmp_bool; + *compound_cmd_detected = true; + break; + + + case MCA_GPR_DELETE_SEGMENT_CMD: /****** DELETE SEGMENT *****/ + + if (mca_gpr_replica_debug) { + ompi_output(0, "\tdelete segment cmd"); + } + + tmp_bool = mca_gpr_replica_recv_silent_mode(buffer); + + if (OMPI_ERROR == (response = mca_gpr_replica_recv_delete_segment_cmd(buffer))) { + goto RETURN_ERROR; + } + + if (!tmp_bool) { + if (OMPI_SUCCESS != ompi_pack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) { + goto RETURN_ERROR; + } + if (OMPI_SUCCESS != ompi_pack(answer, &response, 1, OMPI_INT32)) { + goto RETURN_ERROR; + } + } + break; + + + case MCA_GPR_PUT_CMD: /***** PUT *****/ + + if (mca_gpr_replica_debug) { + ompi_output(0, "\tput cmd"); + } + + tmp_bool = mca_gpr_replica_recv_silent_mode(buffer); + + if (OMPI_ERROR == (response = mca_gpr_replica_recv_put_cmd(buffer))) { + goto RETURN_ERROR; + } + + if (!tmp_bool) { + if (OMPI_SUCCESS != ompi_pack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) { + goto RETURN_ERROR; + } + + if (OMPI_SUCCESS != ompi_pack(answer, &response, 1, OMPI_INT32)) { + goto RETURN_ERROR; + } + } + break; + + + case MCA_GPR_GET_CMD: /***** GET *****/ + + if (mca_gpr_replica_debug) { + ompi_output(0, "\tget cmd"); + } + + if (NULL == (returned_list = mca_gpr_replica_recv_get_cmd(buffer))) { + goto RETURN_ERROR; + } + + if (OMPI_SUCCESS != ompi_pack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) { + goto RETURN_ERROR; + } + + response = (int32_t)ompi_list_get_size(returned_list); + if (OMPI_SUCCESS != ompi_pack(answer, &response, 1, OMPI_INT32)) { + goto RETURN_ERROR; + } + + if (0 < response) { /* don't send anything else back if the list is empty */ + for (regval = (ompi_registry_value_t*)ompi_list_get_first(returned_list); + regval != (ompi_registry_value_t*)ompi_list_get_end(returned_list); + regval = (ompi_registry_value_t*)ompi_list_get_next(regval)) { /* traverse the list */ + if (OMPI_SUCCESS != ompi_pack(answer, ®val->object_size, 1, MCA_GPR_OOB_PACK_OBJECT_SIZE)) { + goto RETURN_ERROR; + } + if (OMPI_SUCCESS != ompi_pack(answer, regval->object, regval->object_size, OMPI_BYTE)) { + goto RETURN_ERROR; + } + } + } + break; + + + case MCA_GPR_DELETE_OBJECT_CMD: /***** DELETE OBJECT *****/ + + if (mca_gpr_replica_debug) { + ompi_output(0, "\tdelete object cmd"); + } + + tmp_bool = mca_gpr_replica_recv_silent_mode(buffer); + + if (OMPI_ERROR == (response = mca_gpr_replica_recv_delete_object_cmd(buffer))) { + goto RETURN_ERROR; + } + + if (!tmp_bool) { + + if (OMPI_SUCCESS != ompi_pack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) { + goto RETURN_ERROR; + } + + if (OMPI_SUCCESS != ompi_pack(answer, &response, 1, OMPI_INT32)) { + goto RETURN_ERROR; + } + } + break; + + + case MCA_GPR_INDEX_CMD: /***** INDEX *****/ + + if (mca_gpr_replica_debug) { + ompi_output(0, "\tindex cmd"); + } + + if (NULL == (returned_list = mca_gpr_replica_recv_index_cmd(buffer))) { + goto RETURN_ERROR; + } + + if (OMPI_SUCCESS != ompi_pack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) { + goto RETURN_ERROR; + } + + response = (int32_t)ompi_list_get_size(returned_list); + if (OMPI_SUCCESS != ompi_pack(answer, &response, 1, OMPI_INT32)) { + goto RETURN_ERROR; + } + + if (0 < response) { /* don't send anything else back if the list is empty */ + for (indexval = (ompi_registry_index_value_t*)ompi_list_get_first(returned_list); + indexval != (ompi_registry_index_value_t*)ompi_list_get_end(returned_list); + indexval = (ompi_registry_index_value_t*)ompi_list_get_next(indexval)) { /* traverse the list */ + if (OMPI_SUCCESS != ompi_pack_string(answer, indexval->token)) { + goto RETURN_ERROR; + } + } + } + break; + + + case MCA_GPR_SUBSCRIBE_CMD: /***** SUBSCRIBE *****/ + + if (mca_gpr_replica_debug) { + ompi_output(0, "\tsubscribe cmd"); + } + + if (OMPI_REGISTRY_NOTIFY_ID_MAX == (return_tag = mca_gpr_replica_recv_subscribe_cmd(sender, buffer))) { + goto RETURN_ERROR; + } + + if (OMPI_SUCCESS != ompi_pack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) { + goto RETURN_ERROR; + } + + if (OMPI_SUCCESS != ompi_pack(answer, &return_tag, 1, MCA_GPR_OOB_PACK_NOTIFY_ID)) { + goto RETURN_ERROR; + } + break; + + + case MCA_GPR_UNSUBSCRIBE_CMD: /***** UNSUBSCRIBE *****/ + + if (mca_gpr_replica_debug) { + ompi_output(0, "\tunsubscribe cmd"); + } + + tmp_bool = mca_gpr_replica_recv_silent_mode(buffer); + + if (OMPI_ERROR == (response = mca_gpr_replica_recv_unsubscribe_cmd(buffer))) { + goto RETURN_ERROR; + } + + if (!tmp_bool) { + + if (OMPI_SUCCESS != ompi_pack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) { + goto RETURN_ERROR; + } + + if (OMPI_SUCCESS != ompi_pack(answer, &response, 1, OMPI_INT32)) { + goto RETURN_ERROR; + } + } + break; + + + + case MCA_GPR_SYNCHRO_CMD: /***** SYNCHRO *****/ + + if (mca_gpr_replica_debug) { + ompi_output(0, "\tsynchro cmd"); + } + + if (OMPI_REGISTRY_NOTIFY_ID_MAX == (return_tag = mca_gpr_replica_recv_synchro_cmd(sender, buffer))) { + goto RETURN_ERROR; + } + + if (OMPI_SUCCESS != ompi_pack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) { + goto RETURN_ERROR; + } + + if (OMPI_SUCCESS != ompi_pack(answer, &return_tag, 1, MCA_GPR_OOB_PACK_NOTIFY_ID)) { + goto RETURN_ERROR; + } + break; + + + + case MCA_GPR_CANCEL_SYNCHRO_CMD: /***** CANCEL SYNCHRO *****/ + + if (mca_gpr_replica_debug) { + ompi_output(0, "\tcancel synchro cmd"); + } + + tmp_bool = mca_gpr_replica_recv_silent_mode(buffer); + + if (OMPI_ERROR == (response = mca_gpr_replica_recv_cancel_synchro_cmd(buffer))) { + goto RETURN_ERROR; + } + + if (!tmp_bool) { + if (OMPI_SUCCESS != ompi_pack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) { + goto RETURN_ERROR; + } + + if (OMPI_SUCCESS != ompi_pack(answer, &response, 1, OMPI_INT32)) { + goto RETURN_ERROR; + } + } + break; + + + + case MCA_GPR_DUMP_CMD: /***** DUMP *****/ + + if (mca_gpr_replica_debug) { + ompi_output(0, "\tdump cmd"); + } + + if (OMPI_SUCCESS != ompi_pack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) { + goto RETURN_ERROR; + } + + mca_gpr_replica_recv_dump_cmd(answer); + break; + + + + case MCA_GPR_GET_STARTUP_MSG_CMD: /***** GET STARTUP MSG *****/ + + if (mca_gpr_replica_debug) { + ompi_output(0, "\tget startup msg cmd"); + } + + mca_gpr_replica_recv_get_startup_msg_cmd(buffer, answer); + break; + + + + case MCA_GPR_NOTIFY_ON_CMD: /***** NOTIFY ON *****/ + + if (mca_gpr_replica_debug) { + ompi_output(0, "\tnotify on cmd"); + } + + mca_gpr_replica_recv_notify_on_cmd(buffer); + break; + + + + case MCA_GPR_NOTIFY_OFF_CMD: /***** NOTIFY OFF ******/ + + if (mca_gpr_replica_debug) { + ompi_output(0, "\tnotify off cmd"); + } + + mca_gpr_replica_recv_notify_off_cmd(buffer); + break; + + + + case MCA_GPR_GET_SHUTDOWN_MSG_CMD: /***** GET SHUTDOWN MSG *****/ + + if (mca_gpr_replica_debug) { + ompi_output(0, "\tget shutdown msg cmd"); + } + + mca_gpr_replica_recv_get_shutdown_msg_cmd(buffer, answer); + break; + + + + case MCA_GPR_TRIGGERS_ACTIVE_CMD: /***** TRIGGERS ACTIVE *****/ + + if (mca_gpr_replica_debug) { + ompi_output(0, "\ttriggers active cmd"); + } + + mca_gpr_replica_recv_triggers_active_cmd(buffer); + break; + + + + case MCA_GPR_TRIGGERS_INACTIVE_CMD: /***** TRIGGERS INACTIVE *****/ + + if (mca_gpr_replica_debug) { + ompi_output(0, "\ttriggers inactive cmd"); + } + + mca_gpr_replica_recv_triggers_inactive_cmd(buffer); + break; + + + + case MCA_GPR_CLEANUP_JOB_CMD: /***** CLEANUP JOB *****/ + + if (mca_gpr_replica_debug) { + ompi_output(0, "\tcleanup job cmd"); + } + + mca_gpr_replica_recv_cleanup_job_cmd(buffer); + break; + + + + case MCA_GPR_CLEANUP_PROC_CMD: /***** CLEANUP PROCESS *****/ + + if (mca_gpr_replica_debug) { + ompi_output(0, "\tcleanup proc cmd"); + } + + mca_gpr_replica_recv_cleanup_proc_cmd(buffer); + break; + + + + case MCA_GPR_ASSUME_OWNERSHIP_CMD: /***** ASSUME OWNERSHIP *****/ + + if (mca_gpr_replica_debug) { + ompi_output(0, "\tassume ownership command"); + } + + tmp_bool = mca_gpr_replica_recv_silent_mode(buffer); + + if (OMPI_ERROR == (response = mca_gpr_replica_recv_assume_ownership_cmd(buffer))) { + goto RETURN_ERROR; + } + + if (!tmp_bool) { + if (OMPI_SUCCESS != ompi_pack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) { + goto RETURN_ERROR; + } + + if (OMPI_SUCCESS != ompi_pack(answer, &response, 1, OMPI_INT32)) { + goto RETURN_ERROR; + } + } + break; + + + + case MCA_GPR_TEST_INTERNALS_CMD: /***** TEST INTERNALS *****/ + + + if ((OMPI_SUCCESS != ompi_unpack(buffer, &test_level, 1, OMPI_INT32)) || + (0 > test_level)) { + goto RETURN_ERROR; + } + + returned_list = mca_gpr_replica_test_internals(test_level); + + if (OMPI_SUCCESS != ompi_pack(answer, &command, 1, MCA_GPR_OOB_PACK_CMD)) { + goto RETURN_ERROR; + } + + response = (int32_t)ompi_list_get_size(returned_list); + if (OMPI_SUCCESS != ompi_pack(answer, &response, 1, OMPI_INT32)) { + goto RETURN_ERROR; + } + + if (0 < response) { /* don't send anything else back if the list is empty */ + for (testval = (ompi_registry_internal_test_results_t*)ompi_list_get_first(returned_list); + testval != (ompi_registry_internal_test_results_t*)ompi_list_get_end(returned_list); + testval = (ompi_registry_internal_test_results_t*)ompi_list_get_next(testval)) { /* traverse the list */ + if (OMPI_SUCCESS != ompi_pack_string(answer, testval->test)) { + goto RETURN_ERROR; + } + if (OMPI_SUCCESS != ompi_pack_string(answer, testval->message)) { + goto RETURN_ERROR; + } + } + } + break; + + + default: /**** UNRECOGNIZED COMMAND ****/ + RETURN_ERROR: + if (mca_gpr_replica_debug) { + ompi_output(0, "unrecognized command"); + } + ompi_buffer_init(&error_answer, 8); + command = MCA_GPR_ERROR; + ompi_pack(error_answer, (void*)&command, 1, MCA_GPR_OOB_PACK_CMD); + ompi_buffer_free(answer); + return error_answer; + + } /* end switch command */ + + } + + return answer; +} + +static int32_t mca_gpr_replica_recv_delete_segment_cmd(ompi_buffer_t buffer) +{ + char *segment=NULL; + mca_gpr_replica_segment_t *seg; + + if (0 > ompi_unpack_string(buffer, &segment)) { + return OMPI_ERROR; + } + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + + seg = mca_gpr_replica_find_seg(false, segment, MCA_NS_BASE_JOBID_MAX); + + mca_gpr_replica_delete_segment_nl(seg); + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + + return OMPI_SUCCESS; +} + +static int32_t mca_gpr_replica_recv_put_cmd(ompi_buffer_t buffer) +{ + ompi_registry_mode_t mode; + char *segment=NULL, **tokens=NULL, **tokptr=NULL; + int32_t num_tokens, response=(int32_t)OMPI_ERROR; + ompi_registry_object_size_t object_size; + ompi_registry_object_t *object=NULL; + mca_gpr_replica_segment_t *seg; + mca_gpr_replica_key_t *keys; + mca_ns_base_jobid_t jobid; + int num_keys; + int8_t action_taken; + int i; + + if (OMPI_SUCCESS != ompi_unpack(buffer, &mode, 1, MCA_GPR_OOB_PACK_MODE)) { + goto RETURN_ERROR; + } + + if (0 > ompi_unpack_string(buffer, &segment)) { + goto RETURN_ERROR; + } + + if (OMPI_SUCCESS != ompi_unpack(buffer, &jobid, 1, MCA_GPR_OOB_PACK_JOBID)) { + return OMPI_ERROR; + } + + if (OMPI_SUCCESS != ompi_unpack(buffer, &num_tokens, 1, OMPI_INT32)) { + goto RETURN_ERROR; + } + + if (0 >= num_tokens) { /** no tokens provided - error for PUT */ + goto RETURN_ERROR; + } + + tokens = (char**)malloc((num_tokens+1)*sizeof(char*)); + + tokptr = tokens; + for (i=0; i ompi_unpack_string(buffer, tokptr)) { + goto RETURN_ERROR; + } + tokptr++; + } + *tokptr = NULL; + + if (OMPI_SUCCESS != ompi_unpack(buffer, &object_size, 1, MCA_GPR_OOB_PACK_OBJECT_SIZE)) { + goto RETURN_ERROR; + } + + if (0 >= object_size) { /* error condition - nothing to store */ + goto RETURN_ERROR; + } + + object = (ompi_registry_object_t)malloc(object_size); + if (OMPI_SUCCESS != ompi_unpack(buffer, object, object_size, OMPI_BYTE)) { + goto RETURN_ERROR; + } + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + + /* find the segment */ + seg = mca_gpr_replica_find_seg(true, segment, jobid); + if (NULL == seg) { /* couldn't find segment or create it */ + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + goto RETURN_ERROR; + } + + /* convert tokens to array of keys */ + keys = mca_gpr_replica_get_key_list(seg, tokens, &num_keys); + + response = (int32_t)mca_gpr_replica_put_nl(mode, seg, keys, num_keys, + object, object_size, &action_taken); + + mca_gpr_replica_check_subscriptions(seg, action_taken); + + mca_gpr_replica_check_synchros(seg); + + /* release list of keys */ + if (NULL != keys) { + free(keys); + } + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + + RETURN_ERROR: + if (NULL != segment) { + free(segment); + } + if (NULL != object) { + free(object); + } + if (NULL != tokens) { + tokptr = tokens; + for (i=0; i ompi_unpack_string(buffer, &segment)) { + return NULL; + } + + if (OMPI_SUCCESS != ompi_unpack(buffer, &num_tokens, 1, OMPI_INT32)) { + goto RETURN_ERROR; + } + + if (0 >= num_tokens) { /* no tokens provided - wildcard case */ + tokens = NULL; + } else { /* tokens provided */ + tokens = (char**)malloc((num_tokens+1)*sizeof(char*)); + tokptr = tokens; + for (i=0; i ompi_unpack_string(buffer, tokptr)) { + goto RETURN_ERROR; + } + tokptr++; + } + *tokptr = NULL; + } + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + + /* find the specified segment */ + seg = mca_gpr_replica_find_seg(false, segment, MCA_NS_BASE_JOBID_MAX); + if (NULL == seg) { /* segment not found */ + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + return returned_list; + } + + /* convert tokens to array of keys */ + keys = mca_gpr_replica_get_key_list(seg, tokens, &num_keys); + + returned_list = mca_gpr_replica_get_nl(mode, seg, keys, num_keys); + + if (NULL != keys) { + free(keys); + } + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + + RETURN_ERROR: + if (NULL != segment) { + free(segment); + } + if (NULL != tokens) { + tokptr = tokens; + for (i=0; i ompi_unpack_string(buffer, &segment)) { + goto RETURN_ERROR; + } + + if (OMPI_SUCCESS != ompi_unpack(buffer, &num_tokens, 1, OMPI_INT32)) { + goto RETURN_ERROR; + } + + if (0 >= num_tokens) { /* no tokens provided - wildcard case */ + tokens = NULL; + } else { /* tokens provided */ + tokens = (char**)malloc((num_tokens+1)*sizeof(char*)); + tokptr = tokens; + for (i=0; i ompi_unpack_string(buffer, tokptr)) { + goto RETURN_ERROR; + } + tokptr++; + } + *tokptr = NULL; + } + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + + /* locate the segment */ + seg = mca_gpr_replica_find_seg(false, segment, ompi_name_server.get_jobid(ompi_rte_get_self())); + if (NULL == seg) { + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + return OMPI_ERROR; + } + + keys = mca_gpr_replica_get_key_list(seg, tokens, &num_keys); + + response = (int32_t)mca_gpr_replica_delete_object_nl(mode, seg, keys, num_keys); + + mca_gpr_replica_check_subscriptions(seg, MCA_GPR_REPLICA_OBJECT_DELETED); + + mca_gpr_replica_check_synchros(seg); + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + + RETURN_ERROR: + if (NULL != segment) { + free(segment); + } + if (NULL != tokens) { + tokptr = tokens; + for (i=0; i ompi_unpack_string(buffer, &segment)) { + goto RETURN_ERROR; + } + } + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + + if (NULL == segment) { /* want global level index */ + seg = NULL; + } else { + /* locate the segment */ + seg = mca_gpr_replica_find_seg(false, segment, MCA_NS_BASE_JOBID_MAX); + if (NULL == seg) { + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + return NULL; + } + } + + returned_list = mca_gpr_replica_index_nl(seg); + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + + RETURN_ERROR: + if (NULL != segment) { + free(segment); + } + return returned_list; +} + +static ompi_registry_notify_id_t mca_gpr_replica_recv_subscribe_cmd(ompi_process_name_t* sender, + ompi_buffer_t buffer) +{ + ompi_registry_mode_t mode; + char *segment=NULL, **tokens=NULL, **tokptr=NULL; + ompi_registry_notify_action_t action; + ompi_registry_notify_id_t local_idtag1, id_tag, return_tag; + mca_gpr_replica_segment_t *seg; + mca_gpr_replica_key_t *keys; + int num_keys; + int32_t num_tokens, response=(int32_t)OMPI_ERROR; + int i; + + return_tag = OMPI_REGISTRY_NOTIFY_ID_MAX; + + if (OMPI_SUCCESS != ompi_unpack(buffer, &mode, 1, MCA_GPR_OOB_PACK_MODE)) { + goto RETURN_ERROR; + } + + if (OMPI_SUCCESS != ompi_unpack(buffer, &action, 1, MCA_GPR_OOB_PACK_ACTION)) { + goto RETURN_ERROR; + } + + if (0 > ompi_unpack_string(buffer, &segment)) { + goto RETURN_ERROR; + } + + if (OMPI_SUCCESS != ompi_unpack(buffer, &num_tokens, 1, OMPI_INT32)) { + goto RETURN_ERROR; + } + + if (0 < num_tokens) { /* tokens provided */ + tokens = (char**)malloc((num_tokens+1)*sizeof(char*)); + tokptr = tokens; + for (i=0; i ompi_unpack_string(buffer, tokptr)) { + goto RETURN_ERROR; + } + tokptr++; + } + *tokptr = NULL; + } else { /* no tokens provided - wildcard case */ + tokens = NULL; + } + + if (OMPI_SUCCESS != ompi_unpack(buffer, &id_tag, 1, MCA_GPR_OOB_PACK_NOTIFY_ID)) { + goto RETURN_ERROR; + } + + /******* LOCK *****/ + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + + seg = mca_gpr_replica_find_seg(true, segment, ompi_name_server.get_jobid(ompi_rte_get_self())); + if (NULL == seg) { /* segment couldn't be found */ + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + goto RETURN_ERROR; + } + + /* convert tokens to keys */ + keys = mca_gpr_replica_get_key_list(seg, tokens, &num_keys); + + if (NULL != sender) { /* remote sender */ + + /* enter request on local notify tracking system */ + local_idtag1 = mca_gpr_replica_enter_notify_request(seg, action, sender, id_tag, NULL, NULL); + + response = (int32_t)mca_gpr_replica_subscribe_nl(mode, action, seg, keys, num_keys, + local_idtag1); + if (OMPI_SUCCESS == response) { + return_tag = local_idtag1; + } + + } else { /* local sender - id_tag is for local notify tracking system*/ + response = (int32_t)mca_gpr_replica_subscribe_nl(mode, action, seg, + keys, num_keys, id_tag); + if (OMPI_SUCCESS == response) { + return_tag = id_tag; + } + } + + mca_gpr_replica_check_subscriptions(seg, MCA_GPR_REPLICA_SUBSCRIBER_ADDED); + + if (NULL != keys) { + free(keys); + } + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + /****** UNLOCK ******/ + + RETURN_ERROR: + if (NULL != segment) { + free(segment); + } + if (NULL != tokens) { + tokptr = tokens; + for (i=0; i ompi_unpack_string(buffer, &segment)) { + goto RETURN_ERROR; + } + + if (OMPI_SUCCESS != ompi_unpack(buffer, &num_tokens, 1, OMPI_INT32)) { + goto RETURN_ERROR; + } + + if (0 < num_tokens) { /* tokens provided */ + tokens = (char**)malloc((num_tokens+1)*sizeof(char*)); + tokptr = tokens; + for (i=0; i ompi_unpack_string(buffer, tokptr)) { + goto RETURN_ERROR; + } + tokptr++; + } + *tokptr = NULL; + } else { /* no tokens provided - wildcard case, just count entries on segment */ + tokens = NULL; + } + + if (OMPI_SUCCESS != ompi_unpack(buffer, &trigger, 1, OMPI_INT32)) { + goto RETURN_ERROR; + } + + if (OMPI_SUCCESS != ompi_unpack(buffer, &id_tag, 1, MCA_GPR_OOB_PACK_NOTIFY_ID)) { + goto RETURN_ERROR; + } + + + /******* LOCK *****/ + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + + seg = mca_gpr_replica_find_seg(true, segment, ompi_name_server.get_jobid(ompi_rte_get_self())); + if (NULL == seg) { /* segment couldn't be found */ + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + goto RETURN_ERROR; + } + + /* convert tokens to keys */ + keys = mca_gpr_replica_get_key_list(seg, tokens, &num_keys); + + if (NULL != sender) { /* remote sender */ + + /* enter request on local notify tracking system */ + local_idtag1 = mca_gpr_replica_enter_notify_request(seg, + OMPI_REGISTRY_NOTIFY_NONE, sender, + id_tag, NULL, NULL); + + response = (int32_t)mca_gpr_replica_synchro_nl(synchro_mode, + mode, seg, keys, num_keys, + trigger, local_idtag1); + + if (OMPI_SUCCESS == response) { + return_tag = local_idtag1; + } + + } else { /* local sender - id_tag already on local notify tracking system */ + response = (int32_t)mca_gpr_replica_synchro_nl(synchro_mode, + mode, seg, keys, num_keys, + trigger, id_tag); + if (OMPI_SUCCESS == response) { + return_tag = id_tag; + } + } + + mca_gpr_replica_check_synchros(seg); + + if (NULL != keys) { + free(keys); + } + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + /****** UNLOCK ******/ + + RETURN_ERROR: + if (NULL != segment) { + free(segment); + } + if (NULL != tokens) { + tokptr = tokens; + for (i=0; iname, 1, OMPI_NAME); + OBJ_RELEASE(recip); + } + + ompi_buffer_get(msg, &addr, &size); + + ompi_pack(answer, &size, 1, OMPI_INT32); + ompi_pack(answer, &addr, size, OMPI_BYTE); + +} + + +static void mca_gpr_replica_recv_get_shutdown_msg_cmd(ompi_buffer_t buffer, ompi_buffer_t answer) +{ + char *jobidstring; + mca_ns_base_jobid_t jobid; + ompi_list_t *recipients; + ompi_buffer_t msg; + ompi_name_server_namelist_t *recip; + void *addr; + int32_t size, num_recipients, i; + + if (OMPI_SUCCESS != ompi_unpack_string(buffer, &jobidstring)) { + return; + } + + jobid = ompi_name_server.convert_string_to_jobid(jobidstring); + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + + msg = mca_gpr_replica_construct_startup_shutdown_msg_nl(OMPI_SHUTDOWN_DETECTED, jobid, recipients); + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + + num_recipients = (int32_t)ompi_list_get_size(recipients); + if (OMPI_SUCCESS != ompi_pack(answer, &num_recipients, 1, OMPI_INT32)) { + return; + } + + for (i=0; iname, 1, OMPI_NAME); + OBJ_RELEASE(recip); + } + + ompi_buffer_get(msg, &addr, &size); + + ompi_pack(answer, &size, 1, OMPI_INT32); + ompi_pack(answer, &addr, size, OMPI_BYTE); +} + +static void mca_gpr_replica_recv_triggers_active_cmd(ompi_buffer_t cmd) +{ + mca_ns_base_jobid_t jobid; + + if (OMPI_SUCCESS != ompi_unpack(cmd, &jobid, 1, MCA_GPR_OOB_PACK_JOBID)) { + return; + } + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + mca_gpr_replica_triggers_active_nl(jobid); + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); +} + +static void mca_gpr_replica_recv_triggers_inactive_cmd(ompi_buffer_t cmd) +{ + mca_ns_base_jobid_t jobid; + + if (OMPI_SUCCESS != ompi_unpack(cmd, &jobid, 1, MCA_GPR_OOB_PACK_JOBID)) { + return; + } + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + mca_gpr_replica_triggers_inactive_nl(jobid); + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); +} + +static void mca_gpr_replica_recv_cleanup_job_cmd(ompi_buffer_t cmd) +{ + mca_ns_base_jobid_t jobid; + + if (OMPI_SUCCESS != ompi_unpack(cmd, &jobid, 1, MCA_GPR_OOB_PACK_JOBID)) { + return; + } + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + mca_gpr_replica_cleanup_job_nl(jobid); + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); +} + + +static void mca_gpr_replica_recv_cleanup_proc_cmd(ompi_buffer_t cmd) +{ + ompi_process_name_t proc; + bool purge; + int8_t tmp; + + if (OMPI_SUCCESS != ompi_unpack(cmd, &tmp, 1, MCA_GPR_OOB_PACK_BOOL)) { + return; + } + purge = (bool)tmp; + + if (OMPI_SUCCESS != ompi_unpack(cmd, &proc, 1, MCA_GPR_OOB_PACK_NAME)) { + return; + } + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + mca_gpr_replica_cleanup_proc_nl(purge, &proc); + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); +} + + +static void mca_gpr_replica_recv_notify_on_cmd(ompi_buffer_t cmd) +{ + ompi_process_name_t proc; + ompi_registry_notify_id_t sub_number; + + if (OMPI_SUCCESS != ompi_unpack(cmd, &proc, 1, MCA_GPR_OOB_PACK_NAME)) { + return; + } + + if (OMPI_SUCCESS != ompi_unpack(cmd, &sub_number, 1, MCA_GPR_OOB_PACK_NOTIFY_ID)) { + return; + } + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + mca_gpr_replica_notify_on_nl(&proc, sub_number); + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); +} + +static void mca_gpr_replica_recv_notify_off_cmd(ompi_buffer_t cmd) +{ + ompi_process_name_t proc; + ompi_registry_notify_id_t sub_number; + + if (OMPI_SUCCESS != ompi_unpack(cmd, &proc, 1, MCA_GPR_OOB_PACK_NAME)) { + return; + } + + if (OMPI_SUCCESS != ompi_unpack(cmd, &sub_number, 1, MCA_GPR_OOB_PACK_NOTIFY_ID)) { + return; + } + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + mca_gpr_replica_notify_off_nl(&proc, sub_number); + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); +} + + +static int32_t mca_gpr_replica_recv_assume_ownership_cmd(ompi_buffer_t cmd) +{ + mca_ns_base_jobid_t jobid; + mca_gpr_replica_segment_t *seg; + char *segment; + int32_t rc; + + if (OMPI_SUCCESS != ompi_unpack(cmd, &jobid, 1, MCA_GPR_OOB_PACK_JOBID)) { + return OMPI_ERROR; + } + + if (0 > ompi_unpack_string(cmd, &segment)) { + return OMPI_ERROR; + } + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + + /* find the segment */ + seg = mca_gpr_replica_find_seg(true, segment, jobid); + if (NULL == seg) { /* segment couldn't be found or created */ + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + return OMPI_ERROR; + } + + rc = (int32_t)mca_gpr_replica_assume_ownership_nl(seg, jobid); + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + + return rc; +} + + + +static bool mca_gpr_replica_recv_silent_mode(ompi_buffer_t buffer) +{ + int8_t tmp; + + if (OMPI_SUCCESS != ompi_unpack(buffer, &tmp, 1, MCA_GPR_OOB_PACK_BOOL)) { + return false; + } + + return (bool)tmp; +} diff --git a/src/mca/gpr/replica/gpr_replica_subscribe.c b/src/mca/gpr/replica/gpr_replica_subscribe.c new file mode 100644 index 0000000000..b4817c690c --- /dev/null +++ b/src/mca/gpr/replica/gpr_replica_subscribe.c @@ -0,0 +1,155 @@ +/* + * $HEADER$ + */ +/** @file: + * + * The Open MPI general purpose registry - implementation. + * + */ + +/* + * includes + */ + +#include "ompi_config.h" + +#include "gpr_replica.h" +#include "gpr_replica_internals.h" + +ompi_registry_notify_id_t +mca_gpr_replica_subscribe(ompi_registry_mode_t addr_mode, + ompi_registry_notify_action_t action, + char *segment, char **tokens, + ompi_registry_notify_cb_fn_t cb_func, void *user_tag) +{ + int rc; + ompi_registry_notify_id_t local_idtag; + mca_gpr_replica_segment_t *seg; + mca_gpr_replica_key_t *keys; + int num_keys; + + /* protect against errors */ + if (NULL == segment) { + return OMPI_REGISTRY_NOTIFY_ID_MAX; + } + + seg = mca_gpr_replica_find_seg(true, segment, ompi_name_server.get_jobid(ompi_rte_get_self())); + if (NULL == seg) { /* segment couldn't be found or created */ + return OMPI_REGISTRY_NOTIFY_ID_MAX; + } + + if (mca_gpr_replica_compound_cmd_mode) { + + mca_gpr_base_pack_subscribe(mca_gpr_replica_compound_cmd, + addr_mode, action, + segment, tokens); + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + + /* enter request on notify tracking system */ + local_idtag = mca_gpr_replica_enter_notify_request(seg, action, NULL, 0, cb_func, user_tag); + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + + ompi_pack(mca_gpr_replica_compound_cmd, &local_idtag, 1, OMPI_INT32); + + return local_idtag; + } + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + + /* enter request on notify tracking system */ + local_idtag = mca_gpr_replica_enter_notify_request(seg, action, NULL, 0, cb_func, user_tag); + + /* convert tokens to keys */ + keys = mca_gpr_replica_get_key_list(seg, tokens, &num_keys); + + /* register subscription */ + rc = mca_gpr_replica_subscribe_nl(addr_mode, action, seg, + keys, num_keys, local_idtag); + + /* check subscriptions */ + mca_gpr_replica_check_subscriptions(seg, MCA_GPR_REPLICA_SUBSCRIBER_ADDED); + + if (NULL != keys) { + free(keys); + } + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + + mca_gpr_replica_process_callbacks(); + + return local_idtag; +} + + +int mca_gpr_replica_subscribe_nl(ompi_registry_mode_t addr_mode, + ompi_registry_notify_action_t action, + mca_gpr_replica_segment_t *seg, + mca_gpr_replica_key_t *keys, + int num_keys, + ompi_registry_notify_id_t id_tag) +{ + mca_gpr_replica_trigger_list_t *trig; + ompi_registry_notify_message_t *notify_msg; + ; + + if (mca_gpr_replica_debug) { + ompi_output(0, "[%d,%d,%d] gpr replica: subscribe entered: segment %s", + OMPI_NAME_ARGS(*ompi_rte_get_self()), seg->name); + } + + /* construct the trigger */ + if (NULL != (trig = mca_gpr_replica_construct_trigger(OMPI_REGISTRY_SYNCHRO_MODE_NONE, action, + addr_mode, seg, keys, num_keys, + 0, id_tag))) { + + if ((OMPI_REGISTRY_NOTIFY_PRE_EXISTING & action) && seg->triggers_active) { /* want list of everything there */ + notify_msg = mca_gpr_replica_construct_notify_message(seg, trig); + notify_msg->trig_action = action; + notify_msg->trig_synchro = OMPI_REGISTRY_SYNCHRO_MODE_NONE; + mca_gpr_replica_process_triggers(seg, trig, notify_msg); + } + return OMPI_SUCCESS; + } else { + return OMPI_ERROR; + } +} + + +int mca_gpr_replica_unsubscribe(ompi_registry_notify_id_t sub_number) +{ + uint rc; + + if (mca_gpr_replica_compound_cmd_mode) { + return mca_gpr_base_pack_unsubscribe(mca_gpr_replica_compound_cmd, + mca_gpr_replica_silent_mode, sub_number); + } + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + + rc = mca_gpr_replica_unsubscribe_nl(sub_number); + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + + if (OMPI_REGISTRY_NOTIFY_ID_MAX == rc) { + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} + + +ompi_registry_notify_id_t +mca_gpr_replica_unsubscribe_nl(ompi_registry_notify_id_t sub_number) +{ + + if (mca_gpr_replica_debug) { + ompi_output(0, "[%d,%d,%d] gpr replica: unsubscribe entered for sub number %d", + OMPI_NAME_ARGS(*ompi_rte_get_self()), sub_number); + } + + /* find trigger on replica and remove it - return requestor's id_tag */ + return mca_gpr_replica_remove_trigger(sub_number); + +} diff --git a/src/mca/gpr/replica/gpr_replica_synchro.c b/src/mca/gpr/replica/gpr_replica_synchro.c new file mode 100644 index 0000000000..fc9fb3f424 --- /dev/null +++ b/src/mca/gpr/replica/gpr_replica_synchro.c @@ -0,0 +1,149 @@ +/* + * $HEADER$ + */ +/** @file: + * + * The Open MPI general purpose registry - implementation. + * + */ + +/* + * includes + */ + +#include "ompi_config.h" + +#include "gpr_replica.h" +#include "gpr_replica_internals.h" + +ompi_registry_notify_id_t +mca_gpr_replica_synchro(ompi_registry_synchro_mode_t synchro_mode, + ompi_registry_mode_t addr_mode, + char *segment, char **tokens, int trigger, + ompi_registry_notify_cb_fn_t cb_func, void *user_tag) +{ + int rc; + ompi_registry_notify_id_t local_idtag; + mca_gpr_replica_segment_t *seg; + mca_gpr_replica_key_t *keys; + int num_keys; + + /* protect against errors */ + if (NULL == segment) { + return OMPI_REGISTRY_NOTIFY_ID_MAX; + } + + seg = mca_gpr_replica_find_seg(true, segment, ompi_name_server.get_jobid(ompi_rte_get_self())); + if (NULL == seg) { /* segment couldn't be found */ + return OMPI_REGISTRY_NOTIFY_ID_MAX; + } + + + if (mca_gpr_replica_compound_cmd_mode) { + mca_gpr_base_pack_synchro(mca_gpr_replica_compound_cmd, + synchro_mode, addr_mode, + segment, tokens, trigger); + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + + /* enter request on notify tracking system */ + local_idtag = mca_gpr_replica_enter_notify_request(seg, OMPI_REGISTRY_NOTIFY_NONE, + NULL, 0, cb_func, user_tag); + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + + ompi_pack(mca_gpr_replica_compound_cmd, &local_idtag, 1, OMPI_INT32); + + return local_idtag; + } + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + + /* enter request on notify tracking system */ + local_idtag = mca_gpr_replica_enter_notify_request(seg, OMPI_REGISTRY_NOTIFY_NONE, + NULL, 0, cb_func, user_tag); + + /* convert tokens to keys */ + keys = mca_gpr_replica_get_key_list(seg, tokens, &num_keys); + + /* process synchro request */ + rc = mca_gpr_replica_synchro_nl(synchro_mode, addr_mode, + seg, keys, num_keys, trigger, local_idtag); + + mca_gpr_replica_check_synchros(seg); + + if (NULL != keys) { + free(keys); + } + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + + mca_gpr_replica_process_callbacks(); + + return local_idtag; +} + +int mca_gpr_replica_synchro_nl(ompi_registry_synchro_mode_t synchro_mode, + ompi_registry_mode_t addr_mode, + mca_gpr_replica_segment_t *seg, + mca_gpr_replica_key_t *keys, + int num_keys, + int trigger, + ompi_registry_notify_id_t id_tag) +{ + mca_gpr_replica_trigger_list_t *trig; + + if (mca_gpr_replica_debug) { + ompi_output(0, "[%d,%d,%d] gpr replica: synchro entered on segment %s trigger %d", + OMPI_NAME_ARGS(*ompi_rte_get_self()), seg->name, trigger); + } + + /* construct the trigger */ + if (NULL != (trig = mca_gpr_replica_construct_trigger(synchro_mode, + OMPI_REGISTRY_NOTIFY_NONE, + addr_mode, seg, keys, num_keys, + trigger, id_tag))) { + return OMPI_SUCCESS; + } else { + return OMPI_ERROR; + } +} + +int mca_gpr_replica_cancel_synchro(ompi_registry_notify_id_t synch_number) +{ + ompi_registry_notify_id_t rc; + + if (mca_gpr_replica_compound_cmd_mode) { + return mca_gpr_base_pack_cancel_synchro(mca_gpr_replica_compound_cmd, + mca_gpr_replica_silent_mode, + synch_number); + } + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + + rc = mca_gpr_replica_cancel_synchro_nl(synch_number); + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + + if (OMPI_REGISTRY_NOTIFY_ID_MAX == rc) { + return OMPI_ERROR; + } + + return OMPI_SUCCESS; +} + +ompi_registry_notify_id_t +mca_gpr_replica_cancel_synchro_nl(ompi_registry_notify_id_t synch_number) +{ + + if (mca_gpr_replica_debug) { + ompi_output(0, "[%d,%d,%d] gpr replica: cancel_synchro entered for synch %d", + OMPI_NAME_ARGS(*ompi_rte_get_self()), synch_number); + } + + /* find trigger on replica and remove it - return requestor's id_tag */ + return mca_gpr_replica_remove_trigger(synch_number); + +} + + diff --git a/src/mca/gpr/replica/gpr_replica_test_internals.c b/src/mca/gpr/replica/gpr_replica_test_internals.c new file mode 100644 index 0000000000..f4c8adfdd3 --- /dev/null +++ b/src/mca/gpr/replica/gpr_replica_test_internals.c @@ -0,0 +1,237 @@ +/* + * $HEADER$ + */ +/** @file: + * + * The Open MPI general purpose registry - support functions. + * + */ + +/* + * includes + */ + +#include "ompi_config.h" + +#include "gpr_replica.h" +#include "gpr_replica_internals.h" + + +ompi_list_t *mca_gpr_replica_test_internals(int level) +{ + ompi_list_t *test_results; + ompi_registry_internal_test_results_t *result; + char name[30], name2[30]; + char *name3[30]; + int i, j, num_keys; + mca_gpr_replica_key_t segkey, key, *keys; + mca_gpr_replica_segment_t *seg; + mca_gpr_replica_keytable_t *dict_entry; + bool success; + mca_ns_base_jobid_t test_jobid; + + + test_results = OBJ_NEW(ompi_list_t); + + if (mca_gpr_replica_compound_cmd_mode) { + mca_gpr_base_pack_test_internals(mca_gpr_replica_compound_cmd, level); + return NULL; + } + + ompi_output(0, "testing define segment"); + /* create several test segments */ + success = true; + result = OBJ_NEW(ompi_registry_internal_test_results_t); + result->test = strdup("test-define-segment"); + for (i=0; i<5 && success; i++) { + sprintf(name, "test-def-seg%d", i); + if (NULL == mca_gpr_replica_define_segment(name, test_jobid)) { + success = false; + } + } + if (success) { + result->message = strdup("success"); + } else { + result->message = strdup("failed"); + } + ompi_list_append(test_results, &result->item); + + ompi_output(0, "testing get key for segment "); + /* check ability to get key for a segment */ + success = true; + result = OBJ_NEW(ompi_registry_internal_test_results_t); + result->test = strdup("test-get-seg-key"); + for (i=0; i<5 && success; i++) { + sprintf(name, "test-def-seg%d", i); + key = mca_gpr_replica_get_key(NULL, name); + if (MCA_GPR_REPLICA_KEY_MAX == key) { /* got an error */ + success = false; + } + } + if (success) { + result->message = strdup("success"); + } else { + result->message = strdup("failed"); + } + ompi_list_append(test_results, &result->item); + + ompi_output(0, "testing define key"); + /* check that define key protects uniqueness */ + success = true; + result = OBJ_NEW(ompi_registry_internal_test_results_t); + result->test = strdup("test-define-key-uniqueness"); + for (i=0; i<5 && success; i++) { + sprintf(name, "test-def-seg%d", i); + segkey = mca_gpr_replica_get_key(NULL, name); + key = mca_gpr_replica_define_key(NULL, name); + if (segkey != key) { /* got an error */ + success = false; + } + } + if (success) { + result->message = strdup("success"); + } else { + result->message = strdup("failed"); + } + ompi_list_append(test_results, &result->item); + + ompi_output(0, "testing find segment"); + /* check the ability to find a segment */ + i = 2; + sprintf(name, "test-def-seg%d", i); + result = OBJ_NEW(ompi_registry_internal_test_results_t); + result->test = strdup("test-find-seg"); + seg = mca_gpr_replica_find_seg(false, name, test_jobid); + if (NULL == seg) { + asprintf(&result->message, "test failed with NULL returned: %s", name); + } else { /* locate key and check it */ + segkey = mca_gpr_replica_get_key(NULL, name); + if (segkey == seg->key) { + result->message = strdup("success"); + } else { + asprintf(&result->message, "test failed: key %d seg %d", segkey, seg->key); + } + } + ompi_list_append(test_results, &result->item); + + ompi_output(0, "testing define key within segment"); + /* check ability to define key within a segment */ + success = true; + result = OBJ_NEW(ompi_registry_internal_test_results_t); + result->test = strdup("test-define-key-segment"); + for (i=0; i<5 && success; i++) { + sprintf(name, "test-def-seg%d", i); + seg = mca_gpr_replica_find_seg(false, name, test_jobid); + for (j=0; j<10 && success; j++) { + sprintf(name2, "test-key%d", j); + key = mca_gpr_replica_define_key(seg, name2); + if (MCA_GPR_REPLICA_KEY_MAX == key) { /* got an error */ + success = false; + } + } + } + if (success) { + result->message = strdup("success"); + } else { + result->message = strdup("failed"); + } + ompi_list_append(test_results, &result->item); + + + ompi_output(0, "testing get key within segment"); + /* check ability to retrieve key within a segment */ + success = true; + result = OBJ_NEW(ompi_registry_internal_test_results_t); + result->test = strdup("test-get-key-segment"); + for (i=0; i<5 && success; i++) { + sprintf(name, "test-def-seg%d", i); + seg = mca_gpr_replica_find_seg(false, name, test_jobid); + for (j=0; j<10 && success; j++) { + sprintf(name2, "test-key%d", j); + key = mca_gpr_replica_get_key(seg, name2); + if (MCA_GPR_REPLICA_KEY_MAX == key) { /* got an error */ + success = false; + } + } + } + if (success) { + result->message = strdup("success"); + } else { + result->message = strdup("failed"); + } + ompi_list_append(test_results, &result->item); + + + ompi_output(0, "testing get dict entry - global"); + /* check ability to get dictionary entries */ + success = true; + result = OBJ_NEW(ompi_registry_internal_test_results_t); + result->test = strdup("test-get-dict-entry"); + /* first check ability to get segment values */ + for (i=0; i<5 && success; i++) { + sprintf(name, "test-def-seg%d", i); + dict_entry = mca_gpr_replica_find_dict_entry(NULL, name); + if (NULL == dict_entry) { /* got an error */ + success = false; + } + } + if (success) { + result->message = strdup("success"); + } else { + result->message = strdup("failed"); + } + ompi_list_append(test_results, &result->item); + + ompi_output(0, "testing get dict entry - segment"); + if (success) { /* segment values checked out - move on to within a segment */ + result = OBJ_NEW(ompi_registry_internal_test_results_t); + result->test = strdup("test-get-dict-entry-segment"); + for (i=0; i<5; i++) { + sprintf(name, "test-def-seg%d", i); + seg = mca_gpr_replica_find_seg(false, name, test_jobid); + for (j=0; j<10; j++) { + sprintf(name2, "test-key%d", j); + dict_entry = mca_gpr_replica_find_dict_entry(seg, name2); + if (NULL == dict_entry) { /* got an error */ + success = false; + } + } + } + if (success) { + result->message = strdup("success"); + } else { + result->message = strdup("failed"); + } + ompi_list_append(test_results, &result->item); + } + + + ompi_output(0, "testing get key list"); + /* check ability to get key list */ + success = true; + result = OBJ_NEW(ompi_registry_internal_test_results_t); + result->test = strdup("test-get-keylist"); + for (i=0; i<5 && success; i++) { + sprintf(name, "test-def-seg%d", i); + seg = mca_gpr_replica_find_seg(false, name, test_jobid); + for (j=0; j<10 && success; j++) { + asprintf(&name3[j], "test-key%d", j); + } + name3[j] = NULL; + keys = mca_gpr_replica_get_key_list(seg, name3, &num_keys); + if (0 >= num_keys) { /* error condition */ + success = false; + } + } + if (success) { + result->message = strdup("success"); + } else { + result->message = strdup("failed"); + } + ompi_list_append(test_results, &result->item); + + /* check ability to empty segment */ + + + return test_results; +} diff --git a/src/mca/gpr/replica/gpr_replica_xmit_alerts.c b/src/mca/gpr/replica/gpr_replica_xmit_alerts.c new file mode 100644 index 0000000000..a22fc2e33d --- /dev/null +++ b/src/mca/gpr/replica/gpr_replica_xmit_alerts.c @@ -0,0 +1,323 @@ +/* -*- C -*- + * + * $HEADER$ + */ +/** @file: + * + * The Open MPI General Purpose Registry - Replica component + * + */ + +/* + * includes + */ +#include "ompi_config.h" + +#include "gpr_replica.h" +#include "gpr_replica_internals.h" + + +void mca_gpr_replica_process_callbacks(void) +{ + mca_gpr_replica_callbacks_t *cb; + + /* aggregate messages for identical recipient - local messages just get called */ + + /* send messages to de-aggregator - that function unpacks them and issues callbacks */ + if (mca_gpr_replica_debug) { + ompi_output(0, "gpr replica: process_callbacks entered"); + } + + + while (NULL != (cb = (mca_gpr_replica_callbacks_t*)ompi_list_remove_first(&mca_gpr_replica_callbacks))) { + if (NULL == cb->requestor) { /* local callback */ + if (mca_gpr_replica_debug) { + ompi_output(0, "process_callbacks: local"); + } + cb->cb_func(cb->message, cb->user_tag); + } else { /* remote request - send message back */ + if (mca_gpr_replica_debug) { + ompi_output(0, "process_callbacks: remote to [%d,%d,%d]", cb->requestor->cellid, + cb->requestor->jobid, cb->requestor->vpid); + } + mca_gpr_replica_remote_notify(cb->requestor, cb->remote_idtag, cb->message); + } + OBJ_RELEASE(cb); + } +} + + +ompi_buffer_t mca_gpr_replica_get_startup_msg(mca_ns_base_jobid_t jobid, + ompi_list_t *recipients) +{ + ompi_buffer_t msg; + + if (mca_gpr_replica_debug) { + ompi_output(0, "[%d,%d,%d] entered get_startup_msg", + OMPI_NAME_ARGS(*ompi_rte_get_self())); + } + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + + msg = mca_gpr_replica_construct_startup_shutdown_msg_nl(OMPI_STARTUP_DETECTED, jobid, recipients); + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + + return msg; +} + + +ompi_buffer_t mca_gpr_replica_get_shutdown_msg(mca_ns_base_jobid_t jobid, + ompi_list_t *recipients) +{ + ompi_buffer_t msg; + + if (mca_gpr_replica_debug) { + ompi_output(0, "[%d,%d,%d] entered get_shutdown_msg", + OMPI_NAME_ARGS(*ompi_rte_get_self())); + } + + OMPI_THREAD_LOCK(&mca_gpr_replica_mutex); + + msg = mca_gpr_replica_construct_startup_shutdown_msg_nl(OMPI_SHUTDOWN_DETECTED, jobid, recipients); + + OMPI_THREAD_UNLOCK(&mca_gpr_replica_mutex); + + return msg; +} + + +ompi_buffer_t +mca_gpr_replica_construct_startup_shutdown_msg_nl(int mode, + mca_ns_base_jobid_t jobid, + ompi_list_t *recipients) +{ + mca_gpr_replica_segment_t *seg=NULL, *proc_stat_seg; + mca_gpr_replica_key_t *keys; + int num_keys; + mca_gpr_replica_core_t *reg=NULL; + mca_gpr_replica_trigger_list_t *trig, *next_trig; + mca_gpr_replica_notify_request_tracker_t *trackptr; + ompi_name_server_namelist_t *peer, *ptr; + ompi_rte_process_status_t *proc_status; + char *segment, *tokens[2]; + int32_t size; + ompi_buffer_t msg; + ompi_list_t *returned_list; + ompi_registry_value_t *value; + bool found, include_data, done; + size_t bufsize; + + if (mca_gpr_replica_debug) { + ompi_output(0, "[%d,%d,%d] entered construct_startup_shutdown_msg for job %d", + OMPI_NAME_ARGS(*ompi_rte_get_self()), (int)jobid); + } + + if (OMPI_SUCCESS != ompi_buffer_init(&msg, 0)) { + return NULL; + } + + /* setup tokens and segments for this job */ + asprintf(&segment, "%s-%s", OMPI_RTE_JOB_STATUS_SEGMENT, + ompi_name_server.convert_jobid_to_string(jobid)); + + /* find the specified segment */ + proc_stat_seg = mca_gpr_replica_find_seg(false, segment, MCA_NS_BASE_JOBID_MAX); + + /* traverse the registry's segments */ + for (seg = (mca_gpr_replica_segment_t*)ompi_list_get_first(&mca_gpr_replica_head.registry); + seg != (mca_gpr_replica_segment_t*)ompi_list_get_end(&mca_gpr_replica_head.registry); + seg = (mca_gpr_replica_segment_t*)ompi_list_get_next(seg)) { + + if (mca_gpr_replica_debug) { + ompi_output(0, "[%d,%d,%d] construct_ss_msg: checking segment %s owned by %d", + OMPI_NAME_ARGS(*ompi_rte_get_self()), seg->name, (int)seg->owning_job); + } + + if (jobid == seg->owning_job) { /* this segment is part of the specified jobid */ + + ompi_pack_string(msg, seg->name); /* pack the segment name */ + include_data = false; + + /* construct the list of recipients and find out if data is desired */ + for (trig = (mca_gpr_replica_trigger_list_t*)ompi_list_get_first(&seg->triggers); + trig != (mca_gpr_replica_trigger_list_t*)ompi_list_get_end(&seg->triggers); + ) { + next_trig = (mca_gpr_replica_trigger_list_t*)ompi_list_get_next(trig); + + if (((OMPI_REGISTRY_NOTIFY_ON_STARTUP & trig->action) && (OMPI_STARTUP_DETECTED == mode)) || + ((OMPI_REGISTRY_NOTIFY_ON_SHUTDOWN & trig->action) && (OMPI_SHUTDOWN_DETECTED == mode))) { + + /* see if data is requested - only one trig has to ask for it */ + if (((OMPI_REGISTRY_NOTIFY_INCLUDE_STARTUP_DATA & trig->action) && (OMPI_STARTUP_DETECTED == mode)) || + ((OMPI_REGISTRY_NOTIFY_INCLUDE_SHUTDOWN_DATA & trig->action) && (OMPI_SHUTDOWN_DETECTED == mode))) { + include_data = true; + } + + /***** if notify_one_shot is set, need to remove subscription from system */ + + /* find subscription on notify tracker */ + done = false; + for (trackptr = (mca_gpr_replica_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_replica_notify_request_tracker); + trackptr != (mca_gpr_replica_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_replica_notify_request_tracker) + && !done; + trackptr = (mca_gpr_replica_notify_request_tracker_t*)ompi_list_get_next(trackptr)) { + if (trackptr->local_idtag == trig->local_idtag) { + done = true; + if (NULL != trackptr->requestor) { + /* see if process already on list of recipients */ + found = false; + for (ptr = (ompi_name_server_namelist_t*)ompi_list_get_first(recipients); + ptr != (ompi_name_server_namelist_t*)ompi_list_get_end(recipients) && !found; + ptr = (ompi_name_server_namelist_t*)ompi_list_get_next(ptr)) { + if (0 == ompi_name_server.compare(OMPI_NS_CMP_ALL, trackptr->requestor, ptr->name)) { + found = true; + } + } + + if (!found) { + /* check job status segment to verify recipient still alive */ + tokens[0] = ompi_name_server.get_proc_name_string(trackptr->requestor); + tokens[1] = NULL; + + /* convert tokens to array of keys */ + keys = mca_gpr_replica_get_key_list(proc_stat_seg, tokens, &num_keys); + + returned_list = mca_gpr_replica_get_nl(OMPI_REGISTRY_XAND, proc_stat_seg, keys, num_keys); + + free(tokens[0]); + free(keys); + + if (NULL != (value = (ompi_registry_value_t*)ompi_list_remove_first(returned_list))) { + proc_status = ompi_rte_unpack_process_status(value); + if ((OMPI_PROC_KILLED != proc_status->status_key) && + (OMPI_PROC_STOPPED != proc_status->status_key)) { + /* add process to list of recipients */ + peer = OBJ_NEW(ompi_name_server_namelist_t); + peer->name = ompi_name_server.copy_process_name(trackptr->requestor); + ompi_list_append(recipients, &peer->item); + } + } + } + } + } + } + } + trig = next_trig; + } + + if (include_data) { /* add in the data from all the registry entries on this segment */ + + size = (int32_t)ompi_list_get_size(&seg->registry_entries); /* and number of data objects */ + ompi_pack(msg, &size, 1, OMPI_INT32); + + + for (reg = (mca_gpr_replica_core_t*)ompi_list_get_first(&seg->registry_entries); + reg != (mca_gpr_replica_core_t*)ompi_list_get_end(&seg->registry_entries); + reg = (mca_gpr_replica_core_t*)ompi_list_get_next(reg)) { + + /* add info to msg payload */ + size = (int32_t)reg->object_size; + ompi_pack(msg, &size, 1, MCA_GPR_OOB_PACK_OBJECT_SIZE); + ompi_pack(msg, reg->object, reg->object_size, OMPI_BYTE); + } + } else { + size = 0; + ompi_pack(msg, &size, 1, OMPI_INT32); + } + } + + if (mca_gpr_replica_debug) { + ompi_buffer_size(msg, &bufsize); + ompi_output(0, "[%d,%d,%d] built startup_shutdown_msg of length %d with %d recipients", + OMPI_NAME_ARGS(*ompi_rte_get_self()), bufsize, (int)ompi_list_get_size(recipients)); + } + } + + return msg; +} + + +void mca_gpr_replica_remote_notify(ompi_process_name_t *recipient, int recipient_tag, + ompi_registry_notify_message_t *message) +{ + ompi_buffer_t msg; + mca_gpr_cmd_flag_t command; + int32_t num_items; + uint i; + ompi_registry_value_t *regval; + char **tokptr; + int recv_tag; + + if (mca_gpr_replica_debug) { + ompi_output(0, "sending trigger message"); + } + + command = MCA_GPR_NOTIFY_CMD; + recv_tag = MCA_OOB_TAG_GPR_NOTIFY; + + if (OMPI_SUCCESS != ompi_buffer_init(&msg, 0)) { + return; + } + + if (OMPI_SUCCESS != ompi_pack(msg, &command, 1, MCA_GPR_OOB_PACK_CMD)) { + return; + } + + if (0 > ompi_pack_string(msg, message->segment)) { + return; + } + + i = (int32_t)message->owning_job; + if (OMPI_SUCCESS != ompi_pack(msg, &i, 1, OMPI_INT32)) { + return; + } + + i = (int32_t)recipient_tag; + if (OMPI_SUCCESS != ompi_pack(msg, &i, 1, OMPI_INT32)) { + return; + } + + if (OMPI_SUCCESS != ompi_pack(msg, &message->trig_action, 1, MCA_GPR_OOB_PACK_ACTION)) { + return; + } + + if (OMPI_SUCCESS != ompi_pack(msg, &message->trig_synchro, 1, MCA_GPR_OOB_PACK_SYNCHRO_MODE)) { + return; + } + + + num_items = (int32_t)ompi_list_get_size(&message->data); + if (OMPI_SUCCESS != ompi_pack(msg, &num_items, 1, OMPI_INT32)) { + return; + } + + if (0 < num_items) { /* don't send anything else back if the list is empty */ + while (NULL != (regval = (ompi_registry_value_t*)ompi_list_remove_first(&message->data))) { + if (OMPI_SUCCESS != ompi_pack(msg, ®val->object_size, 1, MCA_GPR_OOB_PACK_OBJECT_SIZE)) { + return; + } + if (OMPI_SUCCESS != ompi_pack(msg, regval->object, regval->object_size, OMPI_BYTE)) { + return; + } + /* TSW - should we add */ + /* OBJ_RELEASE(regval); */ + } + } + if (OMPI_SUCCESS != ompi_pack(msg, &message->num_tokens, 1, OMPI_INT32)) { + return; + } + + for (i=0, tokptr=message->tokens; i < (uint)message->num_tokens; i++, tokptr++) { + if (OMPI_SUCCESS != ompi_pack_string(msg, *tokptr)) { + return; + } + } + + if (0 > mca_oob_send_packed(recipient, msg, recv_tag, 0)) { + return; + } + + ompi_buffer_free(msg); + OBJ_RELEASE(message); +} diff --git a/src/mca/ns/base/base.h b/src/mca/ns/base/base.h index d6a818ecc5..979cc29d69 100644 --- a/src/mca/ns/base/base.h +++ b/src/mca/ns/base/base.h @@ -44,43 +44,53 @@ OMPI_DECLSPEC int mca_ns_base_close(void); * Base functions that are common to all implementations - can be overridden */ -OMPI_DECLSPEC int ns_base_assign_cellid_to_process(ompi_process_name_t* name); +OMPI_DECLSPEC int mca_ns_base_assign_cellid_to_process(ompi_process_name_t* name); -OMPI_DECLSPEC ompi_process_name_t* ns_base_create_process_name(mca_ns_base_cellid_t cell, +OMPI_DECLSPEC ompi_process_name_t* mca_ns_base_create_process_name(mca_ns_base_cellid_t cell, mca_ns_base_jobid_t job, mca_ns_base_vpid_t vpid); -OMPI_DECLSPEC ompi_process_name_t* ns_base_copy_process_name(ompi_process_name_t* name); +OMPI_DECLSPEC ompi_process_name_t* mca_ns_base_copy_process_name(ompi_process_name_t* name); -OMPI_DECLSPEC ompi_process_name_t* ns_base_convert_string_to_process_name(const char* name); +OMPI_DECLSPEC ompi_process_name_t* mca_ns_base_convert_string_to_process_name(const char* name); -OMPI_DECLSPEC char* ns_base_get_proc_name_string(const ompi_process_name_t* name); +OMPI_DECLSPEC char* mca_ns_base_get_proc_name_string(const ompi_process_name_t* name); -OMPI_DECLSPEC char* ns_base_get_vpid_string(const ompi_process_name_t* name); +OMPI_DECLSPEC char* mca_ns_base_get_vpid_string(const ompi_process_name_t* name); -OMPI_DECLSPEC char* ns_base_get_jobid_string(const ompi_process_name_t* name); +OMPI_DECLSPEC char* mca_ns_base_get_jobid_string(const ompi_process_name_t* name); -OMPI_DECLSPEC char* ns_base_convert_jobid_to_string(const mca_ns_base_jobid_t jobid); +OMPI_DECLSPEC char* mca_ns_base_convert_jobid_to_string(const mca_ns_base_jobid_t jobid); -OMPI_DECLSPEC char* ns_base_get_cellid_string(const ompi_process_name_t* name); +OMPI_DECLSPEC mca_ns_base_jobid_t mca_ns_base_convert_string_to_jobid(const char* jobid_string); -OMPI_DECLSPEC mca_ns_base_vpid_t ns_base_get_vpid(const ompi_process_name_t* name); +OMPI_DECLSPEC char* mca_ns_base_get_cellid_string(const ompi_process_name_t* name); -OMPI_DECLSPEC mca_ns_base_jobid_t ns_base_get_jobid(const ompi_process_name_t* name); +OMPI_DECLSPEC mca_ns_base_vpid_t mca_ns_base_get_vpid(const ompi_process_name_t* name); -OMPI_DECLSPEC mca_ns_base_cellid_t ns_base_get_cellid(const ompi_process_name_t* name); +OMPI_DECLSPEC mca_ns_base_jobid_t mca_ns_base_get_jobid(const ompi_process_name_t* name); -OMPI_DECLSPEC int ns_base_compare(ompi_ns_cmp_bitmask_t fields, +OMPI_DECLSPEC mca_ns_base_cellid_t mca_ns_base_get_cellid(const ompi_process_name_t* name); + +OMPI_DECLSPEC int mca_ns_base_compare(ompi_ns_cmp_bitmask_t fields, const ompi_process_name_t* name1, const ompi_process_name_t* name2); -OMPI_DECLSPEC mca_ns_base_cellid_t ns_base_create_cellid(void); +OMPI_DECLSPEC int mca_ns_base_pack_name(void *dest, void *src, int n); -OMPI_DECLSPEC mca_ns_base_jobid_t ns_base_create_jobid(void); +OMPI_DECLSPEC int mca_ns_base_unpack_name(void *dest, void *src, int n); -OMPI_DECLSPEC mca_ns_base_vpid_t ns_base_reserve_range(mca_ns_base_jobid_t job, mca_ns_base_vpid_t range); +OMPI_DECLSPEC int mca_ns_base_pack_jobid(void *dest, void *src, int n); -OMPI_DECLSPEC int ns_base_free_name(ompi_process_name_t* name); +OMPI_DECLSPEC int mca_ns_base_unpack_jobid(void *dest, void *src, int n); + +OMPI_DECLSPEC mca_ns_base_cellid_t mca_ns_base_create_cellid(void); + +OMPI_DECLSPEC mca_ns_base_jobid_t mca_ns_base_create_jobid(void); + +OMPI_DECLSPEC mca_ns_base_vpid_t mca_ns_base_reserve_range(mca_ns_base_jobid_t job, mca_ns_base_vpid_t range); + +OMPI_DECLSPEC int mca_ns_base_free_name(ompi_process_name_t* name); diff --git a/src/mca/ns/base/ns_base_local_fns.c b/src/mca/ns/base/ns_base_local_fns.c index a1b11884d7..c46705f0f0 100644 --- a/src/mca/ns/base/ns_base_local_fns.c +++ b/src/mca/ns/base/ns_base_local_fns.c @@ -26,14 +26,14 @@ * functions */ -int ns_base_assign_cellid_to_process(ompi_process_name_t* name) +int mca_ns_base_assign_cellid_to_process(ompi_process_name_t* name) { name->cellid = 0; return OMPI_SUCCESS; } -ompi_process_name_t* ns_base_create_process_name(mca_ns_base_cellid_t cell, +ompi_process_name_t* mca_ns_base_create_process_name(mca_ns_base_cellid_t cell, mca_ns_base_jobid_t job, mca_ns_base_vpid_t vpid) { ompi_process_name_t *newname; @@ -55,7 +55,7 @@ ompi_process_name_t* ns_base_create_process_name(mca_ns_base_cellid_t cell, return(newname); } -ompi_process_name_t* ns_base_copy_process_name(ompi_process_name_t* name) +ompi_process_name_t* mca_ns_base_copy_process_name(ompi_process_name_t* name) { mca_ns_base_cellid_t cell; mca_ns_base_jobid_t job; @@ -66,15 +66,15 @@ ompi_process_name_t* ns_base_copy_process_name(ompi_process_name_t* name) return NULL; } - cell = ns_base_get_cellid(name); - job = ns_base_get_jobid(name); - vpid = ns_base_get_vpid(name); + cell = mca_ns_base_get_cellid(name); + job = mca_ns_base_get_jobid(name); + vpid = mca_ns_base_get_vpid(name); - newname = ns_base_create_process_name(cell, job, vpid); + newname = mca_ns_base_create_process_name(cell, job, vpid); return newname; } -char* ns_base_get_proc_name_string(const ompi_process_name_t* name) +char* mca_ns_base_get_proc_name_string(const ompi_process_name_t* name) { char *name_string; @@ -89,7 +89,7 @@ char* ns_base_get_proc_name_string(const ompi_process_name_t* name) return(name_string); } -ompi_process_name_t* ns_base_convert_string_to_process_name(const char* name) +ompi_process_name_t* mca_ns_base_convert_string_to_process_name(const char* name) { char *temp, *token; mca_ns_base_cellid_t cell; @@ -145,7 +145,7 @@ ompi_process_name_t* ns_base_convert_string_to_process_name(const char* name) goto CLEANUP; } - return_code = ns_base_create_process_name(cell, job, vpid); + return_code = mca_ns_base_create_process_name(cell, job, vpid); CLEANUP: if (temp) { @@ -156,7 +156,7 @@ ompi_process_name_t* ns_base_convert_string_to_process_name(const char* name) } -char* ns_base_get_vpid_string(const ompi_process_name_t* name) +char* mca_ns_base_get_vpid_string(const ompi_process_name_t* name) { char *name_string; @@ -172,7 +172,7 @@ char* ns_base_get_vpid_string(const ompi_process_name_t* name) } -char* ns_base_get_jobid_string(const ompi_process_name_t* name) +char* mca_ns_base_get_jobid_string(const ompi_process_name_t* name) { char *name_string; @@ -188,7 +188,7 @@ char* ns_base_get_jobid_string(const ompi_process_name_t* name) } -char* ns_base_convert_jobid_to_string(const mca_ns_base_jobid_t jobid) +char* mca_ns_base_convert_jobid_to_string(const mca_ns_base_jobid_t jobid) { char *jobid_string; @@ -200,7 +200,23 @@ char* ns_base_convert_jobid_to_string(const mca_ns_base_jobid_t jobid) } -char* ns_base_get_cellid_string(const ompi_process_name_t* name) +mca_ns_base_jobid_t mca_ns_base_convert_string_to_jobid(const char* jobidstring) +{ + unsigned long int tmpint; + mca_ns_base_jobid_t jobid; + + tmpint = strtoul(jobidstring, NULL, 16); + if (MCA_NS_BASE_JOBID_MAX >= tmpint) { + jobid = (mca_ns_base_jobid_t)tmpint; + } else { + jobid = MCA_NS_BASE_JOBID_MAX; + } + + return jobid; +} + + +char* mca_ns_base_get_cellid_string(const ompi_process_name_t* name) { char *name_string; @@ -216,7 +232,7 @@ char* ns_base_get_cellid_string(const ompi_process_name_t* name) } -mca_ns_base_vpid_t ns_base_get_vpid(const ompi_process_name_t* name) +mca_ns_base_vpid_t mca_ns_base_get_vpid(const ompi_process_name_t* name) { if (NULL == name) { /* got an error */ return(MCA_NS_BASE_VPID_MAX); @@ -226,7 +242,7 @@ mca_ns_base_vpid_t ns_base_get_vpid(const ompi_process_name_t* name) } -mca_ns_base_jobid_t ns_base_get_jobid(const ompi_process_name_t* name) +mca_ns_base_jobid_t mca_ns_base_get_jobid(const ompi_process_name_t* name) { if (NULL == name) { /* got an error */ return(MCA_NS_BASE_JOBID_MAX); @@ -235,7 +251,7 @@ mca_ns_base_jobid_t ns_base_get_jobid(const ompi_process_name_t* name) return(name->jobid); } -mca_ns_base_cellid_t ns_base_get_cellid(const ompi_process_name_t* name) +mca_ns_base_cellid_t mca_ns_base_get_cellid(const ompi_process_name_t* name) { if (NULL == name) { /* got an error */ return(MCA_NS_BASE_CELLID_MAX); @@ -245,7 +261,7 @@ mca_ns_base_cellid_t ns_base_get_cellid(const ompi_process_name_t* name) } -int ns_base_compare(ompi_ns_cmp_bitmask_t fields, +int mca_ns_base_compare(ompi_ns_cmp_bitmask_t fields, const ompi_process_name_t* name1, const ompi_process_name_t* name2) { @@ -295,7 +311,79 @@ int ns_base_compare(ompi_ns_cmp_bitmask_t fields, } -int ns_base_free_name(ompi_process_name_t* name) +int mca_ns_base_pack_name(void *dest, void *src, int n) +{ + ompi_process_name_t *dn, *sn; + int i; + + dn = (ompi_process_name_t*) dest; + sn = (ompi_process_name_t*) src; + + for (i=0; icellid = htonl(sn->cellid); + dn->jobid = htonl(sn->jobid); + dn->vpid = htonl(sn->vpid); + dn++; sn++; + } + + return OMPI_SUCCESS; +} + + +int mca_ns_base_unpack_name(void *dest, void *src, int n) +{ + ompi_process_name_t *dn, *sn; + int i; + + dn = (ompi_process_name_t*) dest; + sn = (ompi_process_name_t*) src; + + for (i=0; icellid = ntohl(sn->cellid); + dn->jobid = ntohl(sn->jobid); + dn->vpid = ntohl(sn->vpid); + dn++; sn++; + } + + return OMPI_SUCCESS; +} + + +int mca_ns_base_pack_jobid(void *dest, void *src, int n) +{ + mca_ns_base_jobid_t *dj, *sj; + int i; + + dj = (mca_ns_base_jobid_t*) dest; + sj = (mca_ns_base_jobid_t*) src; + + for (i=0; ijob = 0; name_tracker->last_used_vpid = 0; } /* destructor - used to free any resources held by instance */ -static void ompi_name_tracker_destructor(mca_ns_replica_name_tracker_t* name_tracker) +static void mca_ns_replica_tracker_destructor(mca_ns_replica_name_tracker_t* name_tracker) { } @@ -94,8 +99,8 @@ static void ompi_name_tracker_destructor(mca_ns_replica_name_tracker_t* name_tra OBJ_CLASS_INSTANCE( mca_ns_replica_name_tracker_t, /* type name */ ompi_list_item_t, /* parent "class" name */ - ompi_name_tracker_construct, /* constructor */ - ompi_name_tracker_destructor); /* destructor */ + mca_ns_replica_tracker_construct, /* constructor */ + mca_ns_replica_tracker_destructor); /* destructor */ /* * globals needed within replica component diff --git a/src/mca/oob/base/Makefile.am b/src/mca/oob/base/Makefile.am index d20774f0b0..e7e9371a50 100644 --- a/src/mca/oob/base/Makefile.am +++ b/src/mca/oob/base/Makefile.am @@ -27,6 +27,7 @@ libmca_oob_base_la_SOURCES = \ oob_base_recv.c \ oob_base_recv_nb.c \ oob_base_send.c \ + oob_base_xcast.c \ oob_base_send_nb.c # Conditionally install the header files diff --git a/src/mca/oob/base/base.h b/src/mca/oob/base/base.h index 56fe471cf5..1a528e2cfb 100644 --- a/src/mca/oob/base/base.h +++ b/src/mca/oob/base/base.h @@ -28,7 +28,7 @@ OMPI_DECLSPEC extern ompi_process_name_t mca_oob_name_seed; OMPI_DECLSPEC extern ompi_process_name_t mca_oob_name_self; /** - * The wildcard for recieves from any peer. + * The wildcard for receives from any peer. */ #define MCA_OOB_NAME_ANY &mca_oob_name_any /** @@ -51,6 +51,7 @@ OMPI_DECLSPEC extern ompi_process_name_t mca_oob_name_self; /** * Service tags */ +#define MCA_OOB_TAG_XCAST -1 #define MCA_OOB_TAG_NS 1 #define MCA_OOB_TAG_GPR 2 #define MCA_OOB_TAG_GPR_NOTIFY 3 @@ -409,6 +410,24 @@ OMPI_DECLSPEC int mca_oob_recv_packed_nb( mca_oob_callback_packed_fn_t cbfunc, void* cbdata); +/** + * A "broadcast-like" function over the specified set of peers. + * @param root The process acting as the root of the broadcast. + * @param peers The list of processes receiving the broadcast (excluding root). + * @param buffer The data to broadcast - only significant at root. + * @param cbfunc Callback function on receipt of data - not significant at root. + * + * Note that the callback function is provided so that the data can be + * received and interpreted by the application prior to the broadcast + * continuing to forward data along the distribution tree. + */ + +OMPI_DECLSPEC int mca_oob_xcast( + ompi_process_name_t* root, + ompi_list_t* peers, + ompi_buffer_t buffer, + mca_oob_callback_packed_fn_t cbfunc); + #if defined(c_plusplus) || defined(__cplusplus) } #endif diff --git a/src/mca/oob/base/oob_base_barrier.c b/src/mca/oob/base/oob_base_barrier.c index 9bcaff4873..80d7c8415a 100644 --- a/src/mca/oob/base/oob_base_barrier.c +++ b/src/mca/oob/base/oob_base_barrier.c @@ -19,7 +19,7 @@ int mca_oob_barrier(void) return rc; self = mca_pcmclient.pcmclient_get_self(); - iov.iov_base = &foo; + iov.iov_base = (void*)&foo; iov.iov_len = sizeof(foo); /* All non-root send & receive zero-length message. */ diff --git a/src/mca/oob/base/oob_base_init.c b/src/mca/oob/base/oob_base_init.c index cbb2fa9511..81935c25cb 100644 --- a/src/mca/oob/base/oob_base_init.c +++ b/src/mca/oob/base/oob_base_init.c @@ -59,7 +59,7 @@ int mca_oob_parse_contact_info( } *ptr = '\0'; ptr++; - proc_name = ns_base_convert_string_to_process_name(cinfo); + proc_name = mca_ns_base_convert_string_to_process_name(cinfo); *name = *proc_name; free(proc_name); @@ -182,7 +182,7 @@ int mca_oob_base_init(bool *user_threads, bool *hidden_threads) char* mca_oob_get_contact_info() { - char *proc_name = ns_base_get_proc_name_string(MCA_OOB_NAME_SELF); + char *proc_name = mca_ns_base_get_proc_name_string(MCA_OOB_NAME_SELF); char *proc_addr = mca_oob.oob_get_addr(); size_t size = strlen(proc_name) + 1 + strlen(proc_addr) + 1; char *contact_info = malloc(size); diff --git a/src/mca/oob/base/oob_base_xcast.c b/src/mca/oob/base/oob_base_xcast.c new file mode 100644 index 0000000000..dd4bed187c --- /dev/null +++ b/src/mca/oob/base/oob_base_xcast.c @@ -0,0 +1,57 @@ +#include "ompi_config.h" +#include + +#include "include/constants.h" +#include "util/proc_info.h" +#include "mca/oob/oob.h" +#include "mca/oob/base/base.h" +#include "mca/pcmclient/pcmclient.h" +#include "mca/pcmclient/base/base.h" +#include "mca/ns/base/base.h" + + +/** + * A "broadcast-like" function over the specified set of peers. + * @param root The process acting as the root of the broadcast. + * @param peers The list of processes receiving the broadcast (excluding root). + * @param buffer The data to broadcast - only significant at root. + * @param cbfunc Callback function on receipt of data - not significant at root. + * + * Note that the callback function is provided so that the data can be + * received and interpreted by the application prior to the broadcast + * continuing to forward data along the distribution tree. + */ + +int mca_oob_xcast( + ompi_process_name_t* root, + ompi_list_t* peers, + ompi_buffer_t buffer, + mca_oob_callback_packed_fn_t cbfunc) +{ + ompi_name_server_namelist_t *ptr; + int rc; + int tag = MCA_OOB_TAG_XCAST; + + /* check to see if I am the root process name */ + if(NULL != root && + 0 == ompi_name_server.compare(OMPI_NS_CMP_ALL, root, ompi_rte_get_self())) { + while (NULL != (ptr = (ompi_name_server_namelist_t*)ompi_list_remove_first(peers))) { + rc = mca_oob_send_packed(ptr->name, buffer, tag, 0); + if(rc < 0) { + return rc; + } + } + } else { + ompi_buffer_t rbuf; + int rc = mca_oob_recv_packed(MCA_OOB_NAME_ANY, &rbuf, &tag); + if(rc < 0) { + return rc; + } + if(cbfunc != NULL) + cbfunc(rc, root, rbuf, tag, NULL); + ompi_buffer_free(rbuf); + } + return OMPI_SUCCESS; +} + + diff --git a/src/mca/oob/cofs/Makefile.am b/src/mca/oob/cofs/Makefile.am deleted file mode 100644 index d9a1c0be02..0000000000 --- a/src/mca/oob/cofs/Makefile.am +++ /dev/null @@ -1,32 +0,0 @@ -# -# $HEADER$ -# - -# Use the top-level Makefile.options - -include $(top_ompi_srcdir)/config/Makefile.options - -SUBDIRS = src - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if OMPI_BUILD_oob_cofs_DSO -component_noinst = -component_install = mca_oob_cofs.la -else -component_noinst = libmca_oob_cofs.la -component_install = -endif - -mcacomponentdir = $(libdir)/openmpi -mcacomponent_LTLIBRARIES = $(component_install) -mca_oob_cofs_la_SOURCES = -mca_oob_cofs_la_LIBADD = src/libmca_oob_cofs.la -mca_oob_cofs_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_oob_cofs_la_SOURCES = -libmca_oob_cofs_la_LIBADD = src/libmca_oob_cofs.la -libmca_oob_cofs_la_LDFLAGS = -module -avoid-version diff --git a/src/mca/oob/cofs/configure.params b/src/mca/oob/cofs/configure.params deleted file mode 100644 index 3a10063536..0000000000 --- a/src/mca/oob/cofs/configure.params +++ /dev/null @@ -1,9 +0,0 @@ -# -*- shell-script -*- -# -# $HEADER$ -# - -# Specific to this module - -PARAM_INIT_FILE=src/oob_cofs.c -PARAM_CONFIG_FILES="Makefile src/Makefile" diff --git a/src/mca/oob/cofs/src/Makefile.am b/src/mca/oob/cofs/src/Makefile.am deleted file mode 100644 index 9b07e91f54..0000000000 --- a/src/mca/oob/cofs/src/Makefile.am +++ /dev/null @@ -1,12 +0,0 @@ -# -# $HEADER$ -# - -include $(top_ompi_srcdir)/config/Makefile.options - -noinst_LTLIBRARIES = libmca_oob_cofs.la -libmca_oob_cofs_la_SOURCES = \ - oob_cofs.h \ - oob_cofs.c \ - oob_cofs_component.c - diff --git a/src/mca/oob/cofs/src/oob_cofs.c b/src/mca/oob/cofs/src/oob_cofs.c deleted file mode 100644 index 6f358a2109..0000000000 --- a/src/mca/oob/cofs/src/oob_cofs.c +++ /dev/null @@ -1,252 +0,0 @@ -/* -*- C -*- - * - * $HEADER$ - * - */ - -#include "ompi_config.h" -#include -#include -#include - -#include "include/types.h" -#include "include/constants.h" -#include "mca/oob/oob.h" -#include "mca/oob/cofs/src/oob_cofs.h" -#include "mca/ns/base/base.h" - -#include -#include -#include -#include -#include - -static int do_recv( - mca_ns_base_jobid_t jobid, - mca_ns_base_vpid_t procid, - struct iovec* iov, - int count, - int* tag, - int flags); - - -/* -* Similiar to unix send(2). -* -* @param peer (IN) Opaque name of peer process. -* @param msg (IN) Array of iovecs describing user buffers and lengths. -* @param count (IN) Number of elements in iovec array. -* @param flags (IN) Currently unused. -* @return OMPI error code (<0) on error number of bytes actually sent. -*/ - -int mca_oob_cofs_send( - ompi_process_name_t* peer, - struct iovec *iov, - int count, - int tag, - int flags) -{ - FILE *fp; - int i, wlen; - size_t size = 0; - char msg_file[OMPI_PATH_MAX]; - char msg_file_tmp[OMPI_PATH_MAX]; - - /* create the file and open it... */ - snprintf(msg_file, OMPI_PATH_MAX, "%s/%d_%d_%d_%d_%ld.msg", mca_oob_cofs_comm_loc, - ompi_name_server.get_jobid(&mca_oob_name_self), - ompi_name_server.get_vpid(&mca_oob_name_self), - ompi_name_server.get_vpid(peer), tag, (long)mca_oob_cofs_serial); - snprintf(msg_file_tmp, OMPI_PATH_MAX, "%s/.%d_%d_%d_%d_%ld.msg", mca_oob_cofs_comm_loc, - ompi_name_server.get_jobid(&mca_oob_name_self), - ompi_name_server.get_vpid(&mca_oob_name_self), - ompi_name_server.get_vpid(peer), tag, (long)mca_oob_cofs_serial); - - fp = fopen(msg_file_tmp, "w"); - if (fp == NULL) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* write size */ - for(i=0; id_name[0] == '.') continue; - - ret = sscanf(ent->d_name, "%d_%d_%d_%d_%lu.msg", &tmp_jobid, &tmp_procid, - &tmp_myprocid, &tmp_tag, &tmp_serial); - if (ret != 5) { - continue; - } - - if (tmp_jobid != jobid) { - continue; - } - if (tmp_myprocid != ompi_name_server.get_vpid(&mca_oob_name_self)) { - continue; - } - if (tmp_procid != procid) { - continue; - } - if (tag != MCA_OOB_TAG_ANY && tag != tmp_tag) - continue; - - /* do best one here... */ - found = true; - if (tmp_serial < best_serial) { - strcpy(best_name, ent->d_name); - best_serial = tmp_serial; - if(tagp != NULL) *tagp = tmp_tag; - } - } - - closedir(dir); - if (found) { - return strdup(best_name); - } else { - return NULL; - } -} - - -static int -do_recv(mca_ns_base_jobid_t jobid, mca_ns_base_vpid_t procid, struct iovec* iov, int count, int* tag, int flags) -{ - char *fname; - char full_fname[OMPI_PATH_MAX]; - int fd; - size_t rlen; - size_t size; - - fname = find_match(jobid, procid, tag); - if (fname == NULL) { - return OMPI_ERR_WOULD_BLOCK; - } - snprintf(full_fname, OMPI_PATH_MAX, "%s/%s", mca_oob_cofs_comm_loc, fname); - free(fname); - - fd = open(full_fname, O_RDONLY); - if (fd < 0) { - return OMPI_ERROR; - } - if((flags & MCA_OOB_PEEK) == 0) - unlink(full_fname); - - rlen = read(fd, &size, sizeof(size)); - if (rlen != sizeof(size)) { - close(fd); - return OMPI_ERROR; - } - - if(flags & MCA_OOB_ALLOC) { - if(NULL == iov || 0 == count) { - return OMPI_ERR_BAD_PARAM; - } - iov->iov_base = malloc(size); - if(NULL == iov->iov_base) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - iov->iov_len = size; - count = 1; - } - - if(iov != NULL && count > 0) { - rlen = readv(fd, iov, count); - } else { - rlen = 0; - } - close(fd); - return (flags & MCA_OOB_TRUNC) ? size : rlen; -} - diff --git a/src/mca/oob/cofs/src/oob_cofs.h b/src/mca/oob/cofs/src/oob_cofs.h deleted file mode 100644 index cdab433986..0000000000 --- a/src/mca/oob/cofs/src/oob_cofs.h +++ /dev/null @@ -1,127 +0,0 @@ -/* -*- C -*- - * - * $HEADER$ - * - */ -#ifndef OOB_COFS_H -#define OOB_COFS_H -#include "ompi_config.h" - -#include "mca/oob/oob.h" -#include "mca/oob/base/base.h" -#include "include/types.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif -/* - * Module open / close - */ -int mca_oob_cofs_open(void); -int mca_oob_cofs_close(void); - - -/* - * Startup / Shutdown - */ -mca_oob_t* mca_oob_cofs_init(int* priority, bool *allow_multi_user_threads, bool *have_hidden_threads); -int mca_oob_cofs_module_init(void); -int mca_oob_cofs_module_fini(void); - -/* stubs */ -char* mca_oob_cofs_get_addr(void); -int mca_oob_cofs_set_addr(const ompi_process_name_t*, const char*); -int mca_oob_cofs_ping(const ompi_process_name_t*, const struct timeval* tv); -int mca_oob_cofs_recv_cancel(ompi_process_name_t*, int tag); - - -/** -* Implementation of mca_oob_send(). -* -* @param peer (IN) Opaque name of peer process. -* @param msg (IN) Array of iovecs describing user buffers and lengths. -* @param count (IN) Number of elements in iovec array. -* @param flags (IN) Currently unused. -* @return OMPI error code (<0) on error number of bytes actually sent. -*/ - -int mca_oob_cofs_send( - ompi_process_name_t*, - struct iovec* msg, - int count, - int tag, - int flags); - - -/** -* Implementation of mca_oob_recv(). -* -* @param peer (IN) Opaque name of peer process or OOB_NAME_ANY for wildcard receive. -* @param msg (IN) Array of iovecs describing user buffers and lengths. -* @param types (IN) Parallel array to iovecs describing data type of each iovec element. -* @param count (IN) Number of elements in iovec array. -* @param flags (IN) May be OOB_PEEK to return up to the number of bytes provided in the -* iovec array without removing the message from the queue. -* @return OMPI error code (<0) on error or number of bytes actually received. -*/ - -int mca_oob_cofs_recv( - ompi_process_name_t* peer, - struct iovec *msg, - int count, - int* tag, - int flags); - - -/** -* Implementation of mca_oob_send_nb(). -* -* @param peer (IN) Opaque name of peer process. -* @param msg (IN) Array of iovecs describing user buffers and lengths. -* @param count (IN) Number of elements in iovec array. -* @param flags (IN) Currently unused. -* @param cbfunc (IN) Callback function on send completion. -* @param cbdata (IN) User data that is passed to callback function. -* @return OMPI error code (<0) on error number of bytes actually sent. -* -*/ - -int mca_oob_cofs_send_nb( - ompi_process_name_t* peer, - struct iovec* msg, - int count, - int tag, - int flags, - mca_oob_callback_fn_t cbfunc, - void* cbdata); - - -/** -* Implementation of mca_oob_recv_nb(). -* -* @param peer (IN) Opaque name of peer process or OOB_NAME_ANY for wildcard receive. -* @param msg (IN) Array of iovecs describing user buffers and lengths. -* @param count (IN) Number of elements in iovec array. -* @param flags (IN) May be OOB_PEEK to return up to size bytes of msg w/out removing it from the queue, -* @param cbfunc (IN) Callback function on recv completion. -* @param cbdata (IN) User data that is passed to callback function. -* @return OMPI error code (<0) on error or number of bytes actually received. -*/ - -int mca_oob_cofs_recv_nb( - ompi_process_name_t* peer, - struct iovec* msg, - int count, - int tag, - int flags, - mca_oob_callback_fn_t cbfunc, - void* cbdata); - -extern char mca_oob_cofs_comm_loc[OMPI_PATH_MAX]; /* location for file drop-off */ -extern uint64_t mca_oob_cofs_serial; - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif - diff --git a/src/mca/oob/cofs/src/oob_cofs_component.c b/src/mca/oob/cofs/src/oob_cofs_component.c deleted file mode 100644 index b402f4ccec..0000000000 --- a/src/mca/oob/cofs/src/oob_cofs_component.c +++ /dev/null @@ -1,142 +0,0 @@ -/* -*- C -*- - * - * $HEADER$ - * - */ - -#include "ompi_config.h" - -#include "include/constants.h" -#include "mca/mca.h" -#include "mca/oob/oob.h" -#include "oob_cofs.h" -#include "include/types.h" -#include "util/proc_info.h" -#include "util/output.h" - -#include -#include -#include -#include - - -/* - * Struct of function pointers and all that to let us be initialized - */ -mca_oob_base_component_1_0_0_t mca_oob_cofs_component = { - { - MCA_OOB_BASE_VERSION_1_0_0, - - "cofs", /* MCA module name */ - 1, /* MCA module major version */ - 0, /* MCA module minor version */ - 0, /* MCA module release version */ - NULL, - NULL, - }, - { - false /* checkpoint / restart */ - }, - mca_oob_cofs_init /* module init */ -}; - -mca_oob_t mca_oob_cofs = { - mca_oob_cofs_get_addr, - mca_oob_cofs_set_addr, - mca_oob_cofs_ping, - mca_oob_cofs_send, - mca_oob_cofs_recv, - mca_oob_cofs_send_nb, - mca_oob_cofs_recv_nb, - mca_oob_cofs_recv_cancel, - mca_oob_cofs_module_init, - mca_oob_cofs_module_fini -}; - -char mca_oob_cofs_comm_loc[OMPI_PATH_MAX]; -int mca_oob_cofs_my_jobid; -int mca_oob_cofs_my_procid; -uint64_t mca_oob_cofs_serial; - - -char* mca_oob_cofs_get_addr(void) -{ - return strdup("cofs://"); -} - -int mca_oob_cofs_set_addr(const ompi_process_name_t* name, const char* addr) -{ - return OMPI_SUCCESS; -} - -int mca_oob_cofs_ping(const ompi_process_name_t* name, const struct timeval* ts) -{ - return OMPI_SUCCESS; -} - -int mca_oob_cofs_recv_cancel(ompi_process_name_t* name, int tag) -{ - return OMPI_SUCCESS; -} - - - -mca_oob_t* mca_oob_cofs_init(int* priority, bool *allow_multi_user_threads, bool *have_hidden_threads) -{ - int len; - char *tmp; - FILE *fp; - - *priority = 0; - *allow_multi_user_threads = true; - *have_hidden_threads = false; - - /* - * See if we can write in our directory... - */ - if((tmp = getenv("OMPI_MCA_oob_cofs_dir")) != NULL) { - /* user specified in env variable */ - strncpy(mca_oob_cofs_comm_loc, tmp, sizeof(mca_oob_cofs_comm_loc)); - } else if ((tmp = getenv("HOME")) != NULL) { - /* just default to $HOME/cofs */ - snprintf(mca_oob_cofs_comm_loc, sizeof(mca_oob_cofs_comm_loc), - "%s/cofs", tmp); - } else { - ompi_output(0, "mca_oob_cofs_init: invalid/missing " - "OMPI_MCA_oob_cofs_dir\n"); - return NULL; - } - - len = strlen(tmp) + 32; - tmp = malloc(len); - if (tmp == NULL) return NULL; - snprintf(tmp, len, "%s/oob.%d", mca_oob_cofs_comm_loc, mca_oob_cofs_my_procid); - fp = fopen(tmp, "w"); - if (fp == NULL) { - free(tmp); - return NULL; - } - fclose(fp); - unlink(tmp); - free(tmp); - - mca_oob_cofs_serial = 0; - return &mca_oob_cofs; -} - - -int mca_oob_cofs_module_init(void) -{ - if(memcmp(&mca_oob_name_self, &mca_oob_name_any, sizeof(ompi_process_name_t)) == 0) { - mca_oob_name_self.cellid = 0; - mca_oob_name_self.jobid = 1; - mca_oob_name_self.vpid = 0; - } - return OMPI_SUCCESS; -} - -int mca_oob_cofs_module_fini(void) -{ - return OMPI_SUCCESS; -} - diff --git a/src/mca/oob/oob.h b/src/mca/oob/oob.h index 5eb05b535c..0ea85aca97 100644 --- a/src/mca/oob/oob.h +++ b/src/mca/oob/oob.h @@ -13,6 +13,7 @@ #include "include/types.h" #include "mca/mca.h" +#include "mca/gpr/base/base.h" #include "mca/oob/base/base.h" #if defined(c_plusplus) || defined(__cplusplus) @@ -170,6 +171,31 @@ typedef int (*mca_oob_base_module_init_fn_t)(void); */ typedef int (*mca_oob_base_module_fini_fn_t)(void); +/** + * Pack the host's contact information into a buffer for use on the registry + * + * @param buffer (IN) Buffer to be used + * @return Nothing + */ +typedef void (*mca_oob_addr_pack_fn_t)(ompi_buffer_t buffer); + +/** + * Callback function for updating the peer address cache + * + * @param + */ +typedef void (*mca_oob_update_callback_fn_t)( + ompi_registry_notify_message_t* msg, + void* cbdata); + + /** + * xcast function for sending common messages to all processes + */ +typedef int (*mca_oob_base_module_xcast_fn_t)(ompi_process_name_t* root, + ompi_list_t* peers, + ompi_buffer_t buffer, + mca_oob_callback_packed_fn_t cbfunc); + /** * OOB Module */ @@ -184,6 +210,9 @@ struct mca_oob_1_0_0_t { mca_oob_base_module_recv_cancel_fn_t oob_recv_cancel; mca_oob_base_module_init_fn_t oob_init; mca_oob_base_module_fini_fn_t oob_fini; + mca_oob_addr_pack_fn_t oob_addr_pack; + mca_oob_update_callback_fn_t oob_update; + mca_oob_base_module_xcast_fn_t oob_xcast; }; /** diff --git a/src/mca/oob/tcp/oob_tcp.c b/src/mca/oob/tcp/oob_tcp.c index 378d421507..a3bda837f6 100644 --- a/src/mca/oob/tcp/oob_tcp.c +++ b/src/mca/oob/tcp/oob_tcp.c @@ -19,9 +19,8 @@ #include "util/output.h" #include "util/if.h" #include "mca/oob/tcp/oob_tcp.h" -#include "mca/ns/ns.h" +#include "mca/ns/base/base.h" #include "mca/gpr/base/base.h" -#include "mca/gpr/gpr.h" #include "mca/pcmclient/pcmclient.h" #include "mca/pcmclient/base/base.h" @@ -111,6 +110,8 @@ static mca_oob_t mca_oob_tcp = { mca_oob_tcp_recv_cancel, mca_oob_tcp_init, mca_oob_tcp_fini, + mca_oob_tcp_addr_pack, + mca_oob_tcp_registry_callback }; @@ -363,7 +364,7 @@ static void mca_oob_tcp_recv_handler(int sd, short flags, void* user) /* check for wildcard name - if this is true - we allocate a name from the name server * and return to the peer */ - if(mca_oob_tcp_process_name_compare(guid, MCA_OOB_NAME_ANY) == 0) { + if(ompi_name_server.compare(OMPI_NS_CMP_ALL, guid, MCA_OOB_NAME_ANY) == 0) { guid->jobid = ompi_name_server.create_jobid(); guid->vpid = ompi_name_server.reserve_range(guid->jobid,1); ompi_name_server.assign_cellid_to_process(guid); @@ -409,8 +410,10 @@ mca_oob_t* mca_oob_tcp_component_init(int* priority, bool *allow_multi_user_thre return NULL; /* initialize data structures */ - ompi_rb_tree_init(&mca_oob_tcp_component.tcp_peer_tree, (ompi_rb_tree_comp_fn_t)mca_oob_tcp_process_name_compare); - ompi_rb_tree_init(&mca_oob_tcp_component.tcp_peer_names, (ompi_rb_tree_comp_fn_t)mca_oob_tcp_process_name_compare); + ompi_rb_tree_init(&mca_oob_tcp_component.tcp_peer_tree, + (ompi_rb_tree_comp_fn_t)mca_oob_tcp_process_name_compare); + ompi_rb_tree_init(&mca_oob_tcp_component.tcp_peer_names, + (ompi_rb_tree_comp_fn_t)mca_oob_tcp_process_name_compare); ompi_free_list_init(&mca_oob_tcp_component.tcp_peer_free, sizeof(mca_oob_tcp_peer_t), @@ -445,7 +448,7 @@ mca_oob_t* mca_oob_tcp_component_init(int* priority, bool *allow_multi_user_thre * Callback from registry on change to subscribed segments. */ -static void mca_oob_tcp_registry_callback( +void mca_oob_tcp_registry_callback( ompi_registry_notify_message_t* msg, void* cbdata) { @@ -511,7 +514,7 @@ int mca_oob_tcp_resolve(mca_oob_tcp_peer_t* peer) mca_oob_tcp_addr_t* addr; mca_oob_tcp_subscription_t* subscription; ompi_list_item_t* item; - char segment[32], *jobid; + char *segment, *jobid; int rc; /* if the address is already cached - simply return it */ @@ -544,11 +547,13 @@ int mca_oob_tcp_resolve(mca_oob_tcp_peer_t* peer) /* subscribe */ jobid = ompi_name_server.get_jobid_string(&peer->peer_name); - sprintf(segment, "oob-tcp-%s", jobid); + asprintf(&segment, "%s-%s", OMPI_RTE_OOB_SEGMENT, jobid); rc = ompi_registry.subscribe( OMPI_REGISTRY_OR, OMPI_REGISTRY_NOTIFY_ADD_ENTRY|OMPI_REGISTRY_NOTIFY_DELETE_ENTRY| - OMPI_REGISTRY_NOTIFY_MODIFICATION|OMPI_REGISTRY_NOTIFY_PRE_EXISTING, + OMPI_REGISTRY_NOTIFY_MODIFICATION| + OMPI_REGISTRY_NOTIFY_ON_STARTUP|OMPI_REGISTRY_NOTIFY_INCLUDE_STARTUP_DATA| + OMPI_REGISTRY_NOTIFY_ON_SHUTDOWN, segment, NULL, mca_oob_tcp_registry_callback, @@ -570,13 +575,14 @@ int mca_oob_tcp_init(void) char *keys[2], *jobid; void *addr; int32_t size; - char segment[32]; + char *segment; ompi_buffer_t buffer; ompi_process_name_t* peers; mca_oob_tcp_subscription_t* subscription; size_t npeers; int rc; ompi_list_item_t* item; + ompi_registry_notify_id_t rc_tag; /* iterate through the open connections and send an ident message to all peers - * note that we initially come up w/out knowing our process name - and are assigned @@ -598,31 +604,31 @@ int mca_oob_tcp_init(void) } jobid = ompi_name_server.get_jobid_string(&mca_oob_name_self); - sprintf(segment, "oob-tcp-%s", jobid); + asprintf(&segment, "%s-%s", OMPI_RTE_OOB_SEGMENT, jobid); if(mca_oob_tcp_component.tcp_debug > 1) { - ompi_output(0, "[%d,%d,%d] mca_oob_tcp_init: calling ompi_registry.synchro(%s,%d)\n", + ompi_output(0, "[%d,%d,%d] mca_oob_tcp_init: calling ompi_registry.subscribe(%s,%d)\n", OMPI_NAME_ARGS(mca_oob_name_self), segment, npeers); } - /* register synchro callback to receive notification when all processes have registered */ + /* register subscribe callback to receive notification when all processes have registered */ subscription = OBJ_NEW(mca_oob_tcp_subscription_t); subscription->jobid = mca_oob_name_self.jobid; ompi_list_append(&mca_oob_tcp_component.tcp_subscriptions, &subscription->item); OMPI_THREAD_UNLOCK(&mca_oob_tcp_component.tcp_lock); - rc = ompi_registry.synchro( + rc_tag = ompi_registry.subscribe( OMPI_REGISTRY_OR, - OMPI_REGISTRY_SYNCHRO_MODE_ASCENDING|OMPI_REGISTRY_SYNCHRO_MODE_ONE_SHOT, + OMPI_REGISTRY_NOTIFY_ON_STARTUP|OMPI_REGISTRY_NOTIFY_INCLUDE_STARTUP_DATA| + OMPI_REGISTRY_NOTIFY_ON_SHUTDOWN, segment, NULL, - npeers, mca_oob_tcp_registry_callback, NULL); - if(rc != OMPI_SUCCESS) { - ompi_output(0, "mca_oob_tcp_init: registry synchro failed with error code %d.", rc); - return rc; + if(rc_tag == OMPI_REGISTRY_NOTIFY_ID_MAX) { + ompi_output(0, "mca_oob_tcp_init: registry subscription failed"); + return OMPI_ERROR; } /* put our contact info in registry */ @@ -705,19 +711,7 @@ int mca_oob_tcp_fini(void) int mca_oob_tcp_process_name_compare(const ompi_process_name_t* n1, const ompi_process_name_t* n2) { - if(n1->cellid < n2->cellid) - return -1; - else if(n1->cellid > n2->cellid) - return 1; - else if(n1->jobid < n2->jobid) - return -1; - else if(n1->jobid > n2->jobid) - return 1; - else if(n1->vpid < n2->vpid) - return -1; - else if(n1->vpid > n2->vpid) - return 1; - return(0); + return mca_ns_base_compare(OMPI_NS_CMP_ALL, n1, n2); } diff --git a/src/mca/oob/tcp/oob_tcp.h b/src/mca/oob/tcp/oob_tcp.h index 5361cff8a0..db0e29e27a 100644 --- a/src/mca/oob/tcp/oob_tcp.h +++ b/src/mca/oob/tcp/oob_tcp.h @@ -80,8 +80,7 @@ int mca_oob_tcp_fini(void); * when process names are used as indices. */ int mca_oob_tcp_process_name_compare(const ompi_process_name_t* n1, const ompi_process_name_t* n2); - -/** + /** * Obtain contact information for this host (e.g. :) */ @@ -219,6 +218,13 @@ int mca_oob_tcp_parse_uri( struct sockaddr_in* inaddr ); +/** + * Callback from registry on change to subscribed segments + */ +void mca_oob_tcp_registry_callback( + ompi_registry_notify_message_t* msg, + void* cbdata); + /** * OOB TCP Component diff --git a/src/mca/oob/tcp/oob_tcp_hdr.h b/src/mca/oob/tcp/oob_tcp_hdr.h index 09f668f208..906bed92f7 100644 --- a/src/mca/oob/tcp/oob_tcp_hdr.h +++ b/src/mca/oob/tcp/oob_tcp_hdr.h @@ -31,9 +31,9 @@ typedef struct mca_oob_tcp_hdr_t mca_oob_tcp_hdr_t; #define MCA_OOB_TCP_HDR_NTOH(h) \ OMPI_PROCESS_NAME_NTOH((h)->msg_src); \ OMPI_PROCESS_NAME_NTOH((h)->msg_dst); \ - ntohl((h)->msg_type); \ - ntohl((h)->msg_size); \ - ntohl((h)->msg_tag); + (h)->msg_type = ntohl((h)->msg_type); \ + (h)->msg_size = ntohl((h)->msg_size); \ + (h)->msg_tag = ntohl((h)->msg_tag); /** * Convert the message header to network byte order @@ -41,9 +41,9 @@ typedef struct mca_oob_tcp_hdr_t mca_oob_tcp_hdr_t; #define MCA_OOB_TCP_HDR_HTON(h) \ OMPI_PROCESS_NAME_HTON((h)->msg_src); \ OMPI_PROCESS_NAME_HTON((h)->msg_dst); \ - htonl((h)->msg_type); \ - htonl((h)->msg_size); \ - htonl((h)->msg_tag); + (h)->msg_type = htonl((h)->msg_type); \ + (h)->msg_size = htonl((h)->msg_size); \ + (h)->msg_tag = htonl((h)->msg_tag); #endif /* _MCA_OOB_TCP_MESSAGE_H_ */ diff --git a/src/mca/oob/tcp/oob_tcp_msg.c b/src/mca/oob/tcp/oob_tcp_msg.c index e04c06aa86..6f258bcda0 100644 --- a/src/mca/oob/tcp/oob_tcp_msg.c +++ b/src/mca/oob/tcp/oob_tcp_msg.c @@ -106,8 +106,8 @@ int mca_oob_tcp_msg_timedwait(mca_oob_tcp_msg_t* msg, int* rc, struct timespec* #else /* wait for message to complete */ while(msg->msg_complete == false && - (tv.tv_sec <= secs || - (tv.tv_sec == secs && tv.tv_usec < usecs))) { + ((uint32_t)tv.tv_sec <= secs || + ((uint32_t)tv.tv_sec == secs && (uint32_t)tv.tv_usec < usecs))) { ompi_event_loop(OMPI_EVLOOP_ONCE); gettimeofday(&tv,NULL); } @@ -172,7 +172,7 @@ bool mca_oob_tcp_msg_send_handler(mca_oob_tcp_msg_t* msg, struct mca_oob_tcp_pee msg->msg_rc += rc; do {/* while there is still more iovecs to write */ - if(rc < msg->msg_rwptr->iov_len) { + if(rc < (int)msg->msg_rwptr->iov_len) { msg->msg_rwptr->iov_len -= rc; msg->msg_rwptr->iov_base = (void *) ((char *) msg->msg_rwptr->iov_base + rc); break; @@ -270,7 +270,7 @@ static bool mca_oob_tcp_msg_recv(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* pee } do { - if(rc < msg->msg_rwptr->iov_len) { + if(rc < (int)msg->msg_rwptr->iov_len) { msg->msg_rwptr->iov_len -= rc; msg->msg_rwptr->iov_base = (void *) ((char *) msg->msg_rwptr->iov_base + rc); break; @@ -318,7 +318,7 @@ static void mca_oob_tcp_msg_ident(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* pe { ompi_process_name_t src = msg->msg_hdr.msg_src; OMPI_THREAD_LOCK(&mca_oob_tcp_component.tcp_lock); - if(mca_oob_tcp_process_name_compare(&peer->peer_name, &src) != 0) { + if(ompi_name_server.compare(OMPI_NS_CMP_ALL, &peer->peer_name, &src) != 0) { ompi_rb_tree_delete(&mca_oob_tcp_component.tcp_peer_tree, &peer->peer_name); peer->peer_name = src; ompi_rb_tree_insert(&mca_oob_tcp_component.tcp_peer_tree, &peer->peer_name, peer); @@ -362,7 +362,7 @@ static void mca_oob_tcp_msg_data(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* pee */ post->msg_uiov[0].iov_base = msg->msg_rwbuf; post->msg_uiov[0].iov_len = msg->msg_hdr.msg_size; - post->msg_rc = msg->msg_hdr.msg_size; + post->msg_rc = msg->msg_hdr.msg_size; msg->msg_rwbuf = NULL; } @@ -458,8 +458,8 @@ mca_oob_tcp_msg_t* mca_oob_tcp_msg_match_recv(ompi_process_name_t* name, int tag msg != (mca_oob_tcp_msg_t*) ompi_list_get_end(&mca_oob_tcp_component.tcp_msg_recv); msg = (mca_oob_tcp_msg_t*) ompi_list_get_next(msg)) { - if((0 == mca_oob_tcp_process_name_compare(name,MCA_OOB_NAME_ANY) || - (0 == mca_oob_tcp_process_name_compare(name, &msg->msg_peer)))) { + if((0 == ompi_name_server.compare(OMPI_NS_CMP_ALL, name,MCA_OOB_NAME_ANY) || + (0 == ompi_name_server.compare(OMPI_NS_CMP_ALL, name, &msg->msg_peer)))) { if (tag == MCA_OOB_TAG_ANY || tag == msg->msg_hdr.msg_tag) { return msg; } @@ -484,8 +484,8 @@ mca_oob_tcp_msg_t* mca_oob_tcp_msg_match_post(ompi_process_name_t* name, int tag msg != (mca_oob_tcp_msg_t*) ompi_list_get_end(&mca_oob_tcp_component.tcp_msg_post); msg = (mca_oob_tcp_msg_t*) ompi_list_get_next(msg)) { - if((0 == mca_oob_tcp_process_name_compare(&msg->msg_peer,MCA_OOB_NAME_ANY) || - (0 == mca_oob_tcp_process_name_compare(&msg->msg_peer,name)))) { + if((0 == ompi_name_server.compare(OMPI_NS_CMP_ALL, &msg->msg_peer,MCA_OOB_NAME_ANY) || + (0 == ompi_name_server.compare(OMPI_NS_CMP_ALL, &msg->msg_peer,name)))) { if (msg->msg_hdr.msg_tag == MCA_OOB_TAG_ANY || msg->msg_hdr.msg_tag == tag) { if((msg->msg_flags & MCA_OOB_PEEK) == 0 || peek) { ompi_list_remove_item(&mca_oob_tcp_component.tcp_msg_post, &msg->super); diff --git a/src/mca/oob/tcp/oob_tcp_peer.c b/src/mca/oob/tcp/oob_tcp_peer.c index e9bc7c1a34..c7592b14fe 100644 --- a/src/mca/oob/tcp/oob_tcp_peer.c +++ b/src/mca/oob/tcp/oob_tcp_peer.c @@ -211,7 +211,7 @@ mca_oob_tcp_peer_t * mca_oob_tcp_peer_lookup(const ompi_process_name_t* name) /* if the peer list is over the maximum size, remove one unsed peer */ ompi_list_prepend(&mca_oob_tcp_component.tcp_peer_list, (ompi_list_item_t *) peer); if(mca_oob_tcp_component.tcp_peer_limit > 0 && - ompi_list_get_size(&mca_oob_tcp_component.tcp_peer_list) > mca_oob_tcp_component.tcp_peer_limit) { + (int)ompi_list_get_size(&mca_oob_tcp_component.tcp_peer_list) > mca_oob_tcp_component.tcp_peer_limit) { old = (mca_oob_tcp_peer_t *) ompi_list_get_last(&mca_oob_tcp_component.tcp_peer_list); while(1) { @@ -491,7 +491,7 @@ static int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* peer) } /* if we have a wildcard name - use the name returned by the peer */ - if(mca_oob_tcp_process_name_compare(&mca_oob_name_self, &mca_oob_name_any) == 0) { + if(ompi_name_server.compare(OMPI_NS_CMP_ALL, &mca_oob_name_self, &mca_oob_name_any) == 0) { mca_oob_name_self = guid[1]; } @@ -637,7 +637,7 @@ static void mca_oob_tcp_peer_recv_handler(int sd, short flags, void* user) msg->msg_rwiov->iov_len = 1; msg->msg_rwcnt = msg->msg_rwnum = 1; msg->msg_rwptr = msg->msg_rwiov; - msg->msg_rwiov[0].iov_base = &msg->msg_hdr; + msg->msg_rwiov[0].iov_base = (void*)&msg->msg_hdr; msg->msg_rwiov[0].iov_len = sizeof(msg->msg_hdr); peer->peer_recv_msg = msg; } @@ -777,7 +777,7 @@ bool mca_oob_tcp_peer_accept(mca_oob_tcp_peer_t* peer, int sd) if ((peer->peer_state == MCA_OOB_TCP_CLOSED) || (peer->peer_state == MCA_OOB_TCP_RESOLVE) || (peer->peer_state != MCA_OOB_TCP_CONNECTED && - mca_oob_tcp_process_name_compare(&peer->peer_name, MCA_OOB_NAME_SELF) < 0)) { + ompi_name_server.compare(OMPI_NS_CMP_ALL, &peer->peer_name, MCA_OOB_NAME_SELF) < 0)) { if(peer->peer_state != MCA_OOB_TCP_CLOSED) { mca_oob_tcp_peer_close(peer); diff --git a/src/mca/oob/tcp/oob_tcp_ping.c b/src/mca/oob/tcp/oob_tcp_ping.c index 2019d08fd9..708ed6510f 100644 --- a/src/mca/oob/tcp/oob_tcp_ping.c +++ b/src/mca/oob/tcp/oob_tcp_ping.c @@ -50,7 +50,7 @@ int mca_oob_tcp_ping( msg->msg_uiov = NULL; msg->msg_ucnt = 0; msg->msg_rwiov = mca_oob_tcp_msg_iov_alloc(msg, 1); - msg->msg_rwiov[0].iov_base = &msg->msg_hdr; + msg->msg_rwiov[0].iov_base = (void*)&msg->msg_hdr; msg->msg_rwiov[0].iov_len = sizeof(msg->msg_hdr); msg->msg_rwptr = msg->msg_rwiov; msg->msg_rwcnt = msg->msg_rwnum = 1; diff --git a/src/mca/oob/tcp/oob_tcp_recv.c b/src/mca/oob/tcp/oob_tcp_recv.c index 146f6b2c65..5fe83c98eb 100644 --- a/src/mca/oob/tcp/oob_tcp_recv.c +++ b/src/mca/oob/tcp/oob_tcp_recv.c @@ -258,8 +258,8 @@ int mca_oob_tcp_recv_cancel( mca_oob_tcp_msg_t* msg = (mca_oob_tcp_msg_t*)item; next = ompi_list_get_next(item); - if((0 == mca_oob_tcp_process_name_compare(name,MCA_OOB_NAME_ANY) || - (0 == mca_oob_tcp_process_name_compare(&msg->msg_peer,name)))) { + if((0 == ompi_name_server.compare(OMPI_NS_CMP_ALL, name,MCA_OOB_NAME_ANY) || + (0 == ompi_name_server.compare(OMPI_NS_CMP_ALL, &msg->msg_peer,name)))) { if (tag == MCA_OOB_TAG_ANY || msg->msg_hdr.msg_tag == tag) { ompi_list_remove_item(&mca_oob_tcp_component.tcp_msg_post, &msg->super); MCA_OOB_TCP_MSG_RETURN(msg); diff --git a/src/mca/oob/tcp/oob_tcp_send.c b/src/mca/oob/tcp/oob_tcp_send.c index cead0c7b76..cbfc828e81 100644 --- a/src/mca/oob/tcp/oob_tcp_send.c +++ b/src/mca/oob/tcp/oob_tcp_send.c @@ -60,7 +60,7 @@ int mca_oob_tcp_send( msg->msg_uiov = iov; msg->msg_ucnt = count; msg->msg_rwiov = mca_oob_tcp_msg_iov_alloc(msg, count+1); - msg->msg_rwiov[0].iov_base = &msg->msg_hdr; + msg->msg_rwiov[0].iov_base = (void*)&msg->msg_hdr; msg->msg_rwiov[0].iov_len = sizeof(msg->msg_hdr); msg->msg_rwptr = msg->msg_rwiov; msg->msg_rwcnt = msg->msg_rwnum = count + 1; @@ -139,7 +139,7 @@ int mca_oob_tcp_send_nb( msg->msg_uiov = iov; msg->msg_ucnt = count; msg->msg_rwiov = mca_oob_tcp_msg_iov_alloc(msg,count+1); - msg->msg_rwiov[0].iov_base = &msg->msg_hdr; + msg->msg_rwiov[0].iov_base = (void*)&msg->msg_hdr; msg->msg_rwiov[0].iov_len = sizeof(msg->msg_hdr); msg->msg_rwptr = msg->msg_rwiov; msg->msg_rwcnt = msg->msg_rwnum = count + 1; diff --git a/src/mca/pcm/base/pcm_base_kill_track.c b/src/mca/pcm/base/pcm_base_kill_track.c index c3435ff037..f7e26a8a8f 100644 --- a/src/mca/pcm/base/pcm_base_kill_track.c +++ b/src/mca/pcm/base/pcm_base_kill_track.c @@ -274,7 +274,7 @@ mca_pcm_base_kill_send_job_msg(mca_ns_base_jobid_t jobid, /* * Get the contact data */ - keys[0] = ns_base_convert_jobid_to_string(jobid); + keys[0] = mca_ns_base_convert_jobid_to_string(jobid); keys[1] = NULL; snprintf(segment, 256, KILLJOB_SEGMENT_STRING); @@ -306,12 +306,12 @@ mca_pcm_base_kill_send_job_msg(mca_ns_base_jobid_t jobid, if (ret != OMPI_SUCCESS) { printf("ompi_unpack returned %d\n", ret); } - printf("lower: %s\n", ns_base_get_proc_name_string(&proc_name)); + printf("lower: %s\n", mca_ns_base_get_proc_name_string(&proc_name)); ompi_unpack(buf, &proc_name, 1, OMPI_NAME); - printf("upper: %s\n", ns_base_get_proc_name_string(&proc_name)); + printf("upper: %s\n", mca_ns_base_get_proc_name_string(&proc_name)); /* get the contact name */ ompi_unpack(buf, &proc_name, 1, OMPI_NAME); - printf("contact: %s\n", ns_base_get_proc_name_string(&proc_name)); + printf("contact: %s\n", mca_ns_base_get_proc_name_string(&proc_name)); /* free the buffer and start over for packing */ @@ -377,8 +377,8 @@ mca_pcm_base_kill_register(mca_pcm_base_module_t* pcm, ompi_pack(buf, ompi_rte_get_self(), 1, OMPI_NAME); /* fill out the keys */ - keys[0] = ns_base_get_jobid_string(&low); - keys[1] = ns_base_get_vpid_string(&low); + keys[0] = mca_ns_base_get_jobid_string(&low); + keys[1] = mca_ns_base_get_vpid_string(&low); keys[2] = NULL; snprintf(segment, 256, KILLJOB_SEGMENT_STRING); diff --git a/src/mca/pcm/rsh/pcm_rsh_kill.c b/src/mca/pcm/rsh/pcm_rsh_kill.c index 68da34e46e..201a9a07b9 100644 --- a/src/mca/pcm/rsh/pcm_rsh_kill.c +++ b/src/mca/pcm/rsh/pcm_rsh_kill.c @@ -26,8 +26,8 @@ mca_pcm_rsh_kill_proc(struct mca_pcm_base_module_1_0_0_t* me_super, if (0 != (OMPI_RTE_SPAWN_HIGH_QOS & me->constraints)) { pid = mca_pcm_base_job_list_get_starter(me->jobs, - ns_base_get_jobid(name), - ns_base_get_vpid(name), + mca_ns_base_get_jobid(name), + mca_ns_base_get_vpid(name), false); if (pid <= 0) return errno; diff --git a/src/mca/pcm/rsh/pcm_rsh_spawn.c b/src/mca/pcm/rsh/pcm_rsh_spawn.c index ef0123cc3b..06f5907af4 100644 --- a/src/mca/pcm/rsh/pcm_rsh_spawn.c +++ b/src/mca/pcm/rsh/pcm_rsh_spawn.c @@ -33,6 +33,7 @@ #include "util/argv.h" #include "util/numtostr.h" #include "mca/ns/base/base.h" +#include "mca/gpr/base/base.h" #include "util/proc_info.h" #include "util/show_help.h" #include "util/if.h" @@ -494,8 +495,8 @@ internal_wait_cb(pid_t pid, int status, void *data) mca_ns_base_vpid_t lower = 0; mca_ns_base_vpid_t i = 0; int ret; - char *proc_name; mca_pcm_rsh_module_t *me = (mca_pcm_rsh_module_t*) data; + ompi_rte_process_status_t proc_status; ompi_output_verbose(10, mca_pcm_base_output, "process %d exited with status %d", pid, status); @@ -509,11 +510,10 @@ internal_wait_cb(pid_t pid, int status, void *data) } /* unregister all the procs */ + proc_status.status_key = OMPI_PROC_KILLED; + proc_status.exit_code = (ompi_exit_code_t)status; for (i = lower ; i <= upper ; ++i) { - proc_name = - ns_base_get_proc_name_string( - ns_base_create_process_name(0, jobid, i)); - ompi_registry.rte_unregister(proc_name); + ompi_rte_set_process_status(&proc_status, mca_ns_base_create_process_name(0, jobid, i)); } mca_pcm_base_kill_unregister(me, jobid, lower, upper); diff --git a/src/mca/svc/sched/svc_sched.c b/src/mca/svc/sched/svc_sched.c index cc8a8e471f..c8f7c3bb1e 100644 --- a/src/mca/svc/sched/svc_sched.c +++ b/src/mca/svc/sched/svc_sched.c @@ -142,6 +142,8 @@ int mca_svc_sched_module_init(mca_svc_base_module_t* module) { /* register */ int rc; + ompi_registry_notify_id_t rc_tag; + if(mca_svc_sched_component.sched_debug > 0) { ompi_output(0, "[%d,%d,%d] mca_svc_sched_module_init: calling ompi_registry.subscribe(\"vm\")"); } @@ -149,20 +151,21 @@ int mca_svc_sched_module_init(mca_svc_base_module_t* module) mca_svc_sched_component.sched_node_next = (mca_svc_sched_node_t*)ompi_list_get_end(&mca_svc_sched_component.sched_node_list); - rc = ompi_registry.subscribe( - OMPI_REGISTRY_NONE, - OMPI_REGISTRY_NOTIFY_MODIFICATION| - OMPI_REGISTRY_NOTIFY_ADD_ENTRY| - OMPI_REGISTRY_NOTIFY_DELETE_ENTRY| - OMPI_REGISTRY_NOTIFY_PRE_EXISTING, - "ompi-vm", /* segment */ - NULL, /* keys */ - mca_svc_sched_registry_callback, - NULL); - if(rc != OMPI_SUCCESS) { - ompi_output(0, "[%d,%d,%d] mca_svc_sched_module_init: ompi_registry.subscribe failed, error=%d\n", - OMPI_NAME_ARGS(mca_oob_name_self), rc); - return rc; + rc_tag = ompi_registry.subscribe( + OMPI_REGISTRY_NONE, + OMPI_REGISTRY_NOTIFY_MODIFICATION| + OMPI_REGISTRY_NOTIFY_ADD_ENTRY| + OMPI_REGISTRY_NOTIFY_DELETE_ENTRY| + OMPI_REGISTRY_NOTIFY_PRE_EXISTING | + OMPI_REGISTRY_NOTIFY_ON_STARTUP, + OMPI_RTE_VM_STATUS_SEGMENT, /* segment */ + NULL, /* keys */ + mca_svc_sched_registry_callback, + NULL); + if(rc_tag -= OMPI_REGISTRY_NOTIFY_ID_MAX) { + ompi_output(0, "[%d,%d,%d] mca_svc_sched_module_init: ompi_registry.subscribe failed", + OMPI_NAME_ARGS(mca_oob_name_self)); + return OMPI_ERROR; } rc = mca_oob_recv_packed_nb( diff --git a/src/mpi/runtime/ompi_mpi_finalize.c b/src/mpi/runtime/ompi_mpi_finalize.c index 00040376d9..714be11230 100644 --- a/src/mpi/runtime/ompi_mpi_finalize.c +++ b/src/mpi/runtime/ompi_mpi_finalize.c @@ -16,7 +16,6 @@ #include "op/op.h" #include "file/file.h" #include "info/info.h" -#include "util/proc_info.h" #include "runtime/runtime.h" #include "runtime/ompi_progress.h" #include "runtime/ompi_rte_wait.h" @@ -36,145 +35,156 @@ #include "mca/io/base/base.h" #include "mca/oob/base/base.h" #include "mca/ns/base/base.h" +#include "mca/gpr/base/base.h" int ompi_mpi_finalize(void) { - int ret; + int ret; + ompi_rte_process_status_t my_status; - ompi_mpi_finalized = true; + ompi_mpi_finalized = true; #if OMPI_HAVE_THREADS == 0 - ompi_progress_events(OMPI_EVLOOP_ONCE); + ompi_progress_events(OMPI_EVLOOP_ONCE); #endif - /* unregister process */ - if (OMPI_SUCCESS != (ret = ompi_registry.rte_unregister( - ns_base_get_proc_name_string(ompi_rte_get_self())))) { - return ret; - } + /* begin recording compound command */ + ompi_registry.begin_compound_cmd(); - /* wait for all processes to reach same state */ - if (OMPI_SUCCESS != (ret = ompi_rte_monitor_procs_unregistered())) { - if (ompi_rte_debug_flag) { - ompi_output(0, "mpi_finalize: gave up waiting for other processes to complete"); - } - } + /* Set process status to "terminating"*/ + my_status.status_key = OMPI_PROC_TERMINATING; + my_status.exit_code = 0; + if (OMPI_SUCCESS != (ret = ompi_rte_set_process_status(&my_status, ompi_rte_get_self()))) { + return ret; + } - /* shutdown communications */ - if (OMPI_SUCCESS != (ret = mca_ptl_base_close())) { - return ret; - } - if (OMPI_SUCCESS != (ret = mca_pml_base_close())) { - return ret; - } + /* execute the compound command - no return data requested + * we'll get it through the shutdown message + */ + ompi_registry.exec_compound_cmd(OMPI_REGISTRY_NO_RETURN_REQUESTED); - /* Shut down any bindings-specific issues: C++, F77, F90 (may or - may not be necessary...?) */ + /* wait for all processes to reach same state */ + if (OMPI_SUCCESS != (ret = ompi_rte_wait_shutdown_msg())) { + if (ompi_rte_debug_flag) { + ompi_output(0, "mpi_finalize: gave up waiting for other processes to complete"); + } + } - /* Free communication objects */ + /* shutdown communications */ + if (OMPI_SUCCESS != (ret = mca_ptl_base_close())) { + return ret; + } + if (OMPI_SUCCESS != (ret = mca_pml_base_close())) { + return ret; + } - /* free window resources */ + /* Shut down any bindings-specific issues: C++, F77, F90 (may or + may not be necessary...?) */ - /* free file resources */ - if (OMPI_SUCCESS != (ret = ompi_file_finalize())) { - return ret; - } + /* Free communication objects */ - /* free communicator resources */ - if (OMPI_SUCCESS != (ret = ompi_comm_finalize())) { - return ret; - } + /* free window resources */ - /* free requests */ - if (OMPI_SUCCESS != (ret = ompi_request_finalize())) { - return ret; - } + /* free file resources */ + if (OMPI_SUCCESS != (ret = ompi_file_finalize())) { + return ret; + } - /* Free secondary resources */ + /* free communicator resources */ + if (OMPI_SUCCESS != (ret = ompi_comm_finalize())) { + return ret; + } - /* free attr resources */ - if (OMPI_SUCCESS != (ret = ompi_attr_finalize())) { - return ret; - } + /* free requests */ + if (OMPI_SUCCESS != (ret = ompi_request_finalize())) { + return ret; + } - /* free group resources */ - if (OMPI_SUCCESS != (ret = ompi_group_finalize())) { - return ret; - } + /* Free secondary resources */ - /* free internal error resources */ - if (OMPI_SUCCESS != (ret = ompi_errcode_intern_finalize())) { - return ret; - } + /* free attr resources */ + if (OMPI_SUCCESS != (ret = ompi_attr_finalize())) { + return ret; + } + + /* free group resources */ + if (OMPI_SUCCESS != (ret = ompi_group_finalize())) { + return ret; + } + + /* free internal error resources */ + if (OMPI_SUCCESS != (ret = ompi_errcode_intern_finalize())) { + return ret; + } - /* free error class resources */ - if (OMPI_SUCCESS != (ret = ompi_errclass_finalize())) { - return ret; - } + /* free error class resources */ + if (OMPI_SUCCESS != (ret = ompi_errclass_finalize())) { + return ret; + } - /* free error code resources */ - if (OMPI_SUCCESS != (ret = ompi_mpi_errcode_finalize())) { - return ret; - } + /* free error code resources */ + if (OMPI_SUCCESS != (ret = ompi_mpi_errcode_finalize())) { + return ret; + } - /* free errhandler resources */ - if (OMPI_SUCCESS != (ret = ompi_errhandler_finalize())) { - return ret; - } + /* free errhandler resources */ + if (OMPI_SUCCESS != (ret = ompi_errhandler_finalize())) { + return ret; + } - /* Free all other resources */ + /* Free all other resources */ - /* free op resources */ - if (OMPI_SUCCESS != (ret = ompi_op_finalize())) { - return ret; - } + /* free op resources */ + if (OMPI_SUCCESS != (ret = ompi_op_finalize())) { + return ret; + } - /* free ddt resources */ - if (OMPI_SUCCESS != (ret = ompi_ddt_finalize())) { - return ret; - } + /* free ddt resources */ + if (OMPI_SUCCESS != (ret = ompi_ddt_finalize())) { + return ret; + } - /* free info resources */ - if (OMPI_SUCCESS != (ret = ompi_info_finalize())) { - return ret; - } + /* free info resources */ + if (OMPI_SUCCESS != (ret = ompi_info_finalize())) { + return ret; + } - /* free module exchange resources */ - if (OMPI_SUCCESS != (ret = mca_base_modex_finalize())) { - return ret; - } + /* free module exchange resources */ + if (OMPI_SUCCESS != (ret = mca_base_modex_finalize())) { + return ret; + } - /* Close down MCA modules */ + /* Close down MCA modules */ - if (OMPI_SUCCESS != (ret = mca_io_base_close())) { - return ret; - } - if (OMPI_SUCCESS != (ret = mca_topo_base_close())) { - return ret; - } - if (OMPI_SUCCESS != (ret = mca_coll_base_close())) { - return ret; - } + if (OMPI_SUCCESS != (ret = mca_io_base_close())) { + return ret; + } + if (OMPI_SUCCESS != (ret = mca_topo_base_close())) { + return ret; + } + if (OMPI_SUCCESS != (ret = mca_coll_base_close())) { + return ret; + } - /* Leave the RTE */ + /* Leave the RTE */ - if (OMPI_SUCCESS != (ret = ompi_rte_finalize())) { - return ret; - } + if (OMPI_SUCCESS != (ret = ompi_rte_finalize())) { + return ret; + } - /* Close down the MCA */ + /* Close down the MCA */ - if (OMPI_SUCCESS != (ret = mca_base_close())) { - return ret; - } + if (OMPI_SUCCESS != (ret = mca_base_close())) { + return ret; + } - /* Leave OMPI land */ + /* Leave OMPI land */ - if (OMPI_SUCCESS != (ret = ompi_finalize())) { - return ret; - } + if (OMPI_SUCCESS != (ret = ompi_finalize())) { + return ret; + } - /* All done */ + /* All done */ - return MPI_SUCCESS; + return MPI_SUCCESS; } diff --git a/src/mpi/runtime/ompi_mpi_init.c b/src/mpi/runtime/ompi_mpi_init.c index 335d1a6a78..4e55074584 100644 --- a/src/mpi/runtime/ompi_mpi_init.c +++ b/src/mpi/runtime/ompi_mpi_init.c @@ -2,6 +2,8 @@ * $HEADER$ */ +/** @file **/ + #include "ompi_config.h" #include "include/constants.h" @@ -40,6 +42,7 @@ #include "mca/topo/base/base.h" #include "mca/io/io.h" #include "mca/io/base/base.h" +#include "mca/oob/oob.h" #include "mca/oob/base/base.h" #include "mca/ns/base/base.h" #include "mca/gpr/base/base.h" @@ -67,9 +70,9 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) bool allow_multi_user_threads; bool have_hidden_threads; ompi_proc_t** procs; + ompi_rte_process_status_t my_status; size_t nprocs; - char *error = NULL; - char *contact_info; + char *error = NULL, *segment; /* Become an OMPI process */ @@ -93,23 +96,16 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) goto error; } - /* - * Register my process info with my replica. Note that this must be done - * after the rte init is completed. - */ - contact_info = mca_oob_get_contact_info(); - ompi_rte_get_peers(NULL, &nprocs); - if (OMPI_SUCCESS != (ret = ompi_registry.rte_register(contact_info, nprocs, - ompi_rte_all_procs_registered, NULL, - ompi_rte_all_procs_unregistered, NULL))) { - error = "ompi_rte_init: failed in ompi_rte_register()\n"; - goto error; - } + /* start recording the compound command that starts us up */ + ompi_registry.begin_compound_cmd(); - /* wait for all procs to have registered so we can be sure to get everyone's contact info */ - if (OMPI_SUCCESS != (ret = ompi_rte_monitor_procs_registered())) { - error = "ompi_rte_init: failed to see all procs register\n"; - goto error; + /* + * Call back into OOB to allow do any final initialization + * (e.g. put contact info in register). + */ + if (OMPI_SUCCESS != (ret = mca_oob_base_module_init())) { + ompi_output(0, "ompi_rte_init: failed in mca_oob_base_module_init()\n"); + return ret; } /* Once we've joined the RTE, see if any MCA parameters were @@ -252,6 +248,39 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) goto error; } + /* + * Set my process status to "starting". Note that this must be done + * after the rte init is completed. + * + * Ensure we own the job status segment first + */ + asprintf(&segment, "%s-%s", OMPI_RTE_JOB_STATUS_SEGMENT, + ompi_name_server.get_jobid_string(ompi_rte_get_self())); + ompi_registry.assume_ownership(segment); + + my_status.status_key = OMPI_PROC_STARTING; + my_status.exit_code = 0; + if (OMPI_SUCCESS != (ret = ompi_rte_set_process_status(&my_status, ompi_rte_get_self()))) { + error = "ompi_mpi_init: failed in ompi_rte_set_process_status()\n"; + goto error; + } + + /* execute the compound command - no return data requested + * we'll get it all from the startup message + */ + ompi_registry.exec_compound_cmd(OMPI_REGISTRY_NO_RETURN_REQUESTED); + + /* wait to receive startup message and info distributed */ + if (OMPI_SUCCESS != (ret = ompi_rte_wait_startup_msg())) { + error = "ompi_rte_init: failed to see all procs register\n"; + goto error; + } + + if (ompi_rte_debug_flag) { + ompi_output(0, "[%d,%d,%d] process startup message received", + OMPI_NAME_ARGS(*ompi_rte_get_self())); + } + /* add all ompi_proc_t's to PML */ if (NULL == (procs = ompi_proc_world(&nprocs))) { error = "ompi_proc_world() failed"; @@ -331,5 +360,11 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) ompi_mpi_initialized = true; ompi_mpi_finalized = false; + + if (ompi_rte_debug_flag) { + ompi_output(0, "[%d,%d,%d] ompi_mpi_init completed", + OMPI_NAME_ARGS(*ompi_rte_get_self())); + } + return MPI_SUCCESS; } diff --git a/src/runtime/Makefile.am b/src/runtime/Makefile.am index f143f98797..fd5b097436 100644 --- a/src/runtime/Makefile.am +++ b/src/runtime/Makefile.am @@ -28,6 +28,10 @@ libruntime_la_SOURCES = \ ompi_rte_init.c \ ompi_rte_llm.c \ ompi_rte_monitor.c \ + ompi_rte_job_startup.c \ + ompi_rte_job_shutdown.c \ + ompi_rte_wait_startup_shutdown_msg.c \ + ompi_rte_process_status.c \ ompi_rte_cmd_line_setup.c \ ompi_vm_register.c \ universe_exists.c \ diff --git a/src/runtime/ompi_rte_init.c b/src/runtime/ompi_rte_init.c index 55058f7334..f5b82d6756 100644 --- a/src/runtime/ompi_rte_init.c +++ b/src/runtime/ompi_rte_init.c @@ -389,15 +389,6 @@ int ompi_rte_init(ompi_cmd_line_t *cmd_line, bool *allow_multi_user_threads, boo exit(-1); } - /* - * Call back into OOB to allow do any final initialization - * (e.g. put contact info in register). - */ - if (OMPI_SUCCESS != (ret = mca_oob_base_module_init())) { - ompi_output(0, "ompi_rte_init: failed in mca_oob_base_module_init()\n"); - return ret; - } - /* * All done */ @@ -503,26 +494,6 @@ ompi_rte_int_valuepair_destruct(ompi_object_t *obj) if (NULL != valpair->value) free(valpair->value); } -/** constructor for \c ompi_startup_shutdown_message_t */ -static -void -ompi_startup_shutdown_message_construct(ompi_startup_shutdown_message_t *msg) -{ - msg->msg = NULL; -} - - -/** destructor for \c ompi_startup_shutdown_message_t */ -static -void -ompi_startup_shutdown_message_destruct(ompi_startup_shutdown_message_t *msg) -{ - if (NULL != msg->msg) { - OBJ_RELEASE(msg->msg); - } -} - - /** create instance information for \c ompi_rte_node_schedule_t */ OBJ_CLASS_INSTANCE(ompi_rte_node_schedule_t, ompi_list_item_t, ompi_rte_int_node_schedule_construct, @@ -538,7 +509,3 @@ OBJ_CLASS_INSTANCE(ompi_rte_valuepair_t, ompi_list_item_t, /** create instance information for \c ompi_rte_node_allocation_data_t */ OBJ_CLASS_INSTANCE(ompi_rte_node_allocation_data_t, ompi_object_t, NULL, NULL); -/** create instance information for \c ompi_startup_shutdown_message_t */ -OBJ_CLASS_INSTANCE(ompi_startup_shutdown_message_t, ompi_list_item_t, - ompi_startup_shutdown_message_construct, - ompi_startup_shutdown_message_destruct); diff --git a/src/runtime/ompi_rte_job_shutdown.c b/src/runtime/ompi_rte_job_shutdown.c new file mode 100644 index 0000000000..2167a72931 --- /dev/null +++ b/src/runtime/ompi_rte_job_shutdown.c @@ -0,0 +1,45 @@ +/* + * $HEADER$ + */ +/** @file: + * + * The Open MPI general purpose registry - support functions. + * + */ + +/* + * includes + */ + +#include "ompi_config.h" + +#include "mca/oob/oob.h" +#include "mca/oob/base/base.h" +#include "mca/ns/base/base.h" + +#include "runtime/runtime.h" + +int ompi_rte_job_shutdown(mca_ns_base_jobid_t jobid) +{ + ompi_list_t *recipients; + ompi_buffer_t shutdown_msg; + int return_code; + + recipients = OBJ_NEW(ompi_list_t); + + shutdown_msg = ompi_registry.get_shutdown_msg(jobid, recipients); + ompi_registry.triggers_inactive(jobid); + + /* check to ensure there are recipients on list - error if not */ + if (0 < ompi_list_get_size(recipients)) { + mca_oob_xcast(ompi_rte_get_self(), recipients, shutdown_msg, NULL); + return_code = OMPI_SUCCESS; + } else { + return_code = OMPI_ERROR; + } + + ompi_registry.cleanup_job(jobid); + OBJ_RELEASE(recipients); + + return return_code; +} diff --git a/src/runtime/ompi_rte_job_startup.c b/src/runtime/ompi_rte_job_startup.c new file mode 100644 index 0000000000..8ba96c511b --- /dev/null +++ b/src/runtime/ompi_rte_job_startup.c @@ -0,0 +1,65 @@ +/* + * $HEADER$ + */ +/** @file: + * + * + * + */ + +/* + * includes + */ + +#include "ompi_config.h" + +#include "mca/oob/oob.h" +#include "mca/oob/base/base.h" +#include "mca/ns/base/base.h" + +#include "runtime/runtime.h" + + +int ompi_rte_job_startup(mca_ns_base_jobid_t jobid) +{ + ompi_list_t *recipients; + ompi_buffer_t startup_msg; + ompi_name_server_namelist_t *ptr; + ompi_rte_process_status_t proc_status; + int num_procs; + + if (ompi_rte_debug_flag) { + ompi_output(0, "[%d,%d,%d] entered rte_job_startup", + OMPI_NAME_ARGS(*ompi_rte_get_self())); + } + + recipients = OBJ_NEW(ompi_list_t); + + startup_msg = ompi_registry.get_startup_msg(jobid, recipients); + ompi_registry.triggers_active(jobid); + + if (ompi_rte_debug_flag) { + ompi_output(0, "[%d,%d,%d] rte_job_startup: sending startup message to %d recipients", + OMPI_NAME_ARGS(*ompi_rte_get_self()), + ompi_list_get_size(recipients)); + } + + /* check to ensure there are recipients on list - don't send if not */ + if (0 < (num_procs = (int)ompi_list_get_size(recipients))) { + mca_oob_xcast(ompi_rte_get_self(), recipients, startup_msg, NULL); + + /* for each recipient, set process status to "running" */ + proc_status.status_key = OMPI_PROC_RUNNING; + proc_status.exit_code = 0; + while (NULL != (ptr = (ompi_name_server_namelist_t*)ompi_list_remove_first(recipients))) { + ompi_rte_set_process_status(&proc_status, ptr->name); + } + } + + + OBJ_RELEASE(recipients); + + /* return number of processes started = number of recipients */ + return num_procs; + +} diff --git a/src/runtime/ompi_rte_monitor.c b/src/runtime/ompi_rte_monitor.c index 8ef78ed93b..b1b31043dc 100644 --- a/src/runtime/ompi_rte_monitor.c +++ b/src/runtime/ompi_rte_monitor.c @@ -37,6 +37,11 @@ static bool ompi_rte_waiting = false; void ompi_rte_all_procs_registered(ompi_registry_notify_message_t* match, void* cbdata) { + if (ompi_rte_debug_flag) { + ompi_output(0, "[%d,%d,%d] all procs registered", + OMPI_NAME_ARGS(*ompi_rte_get_self())); + } + OMPI_THREAD_LOCK(&ompi_rte_mutex); ompi_rte_job_started = true; if (ompi_rte_waiting) { diff --git a/src/runtime/ompi_rte_parse_cmd_line.c b/src/runtime/ompi_rte_parse_cmd_line.c index 18b2acb56d..b395dfac28 100644 --- a/src/runtime/ompi_rte_parse_cmd_line.c +++ b/src/runtime/ompi_rte_parse_cmd_line.c @@ -122,7 +122,7 @@ void ompi_rte_parse_cmd_line(ompi_cmd_line_t *cmd_line) } ompi_universe_info.ns_replica = strdup(nsreplica); if (NULL == ompi_process_info.ns_replica) { - ompi_process_info.ns_replica = ns_base_create_process_name(0,0,0); + ompi_process_info.ns_replica = mca_ns_base_create_process_name(0,0,0); } mca_oob_parse_contact_info(ompi_universe_info.ns_replica, ompi_process_info.ns_replica, NULL); @@ -142,7 +142,7 @@ void ompi_rte_parse_cmd_line(ompi_cmd_line_t *cmd_line) } ompi_universe_info.gpr_replica = strdup(nsreplica); if (NULL == ompi_process_info.gpr_replica) { - ompi_process_info.gpr_replica = ns_base_create_process_name(0,0,0); + ompi_process_info.gpr_replica = mca_ns_base_create_process_name(0,0,0); } mca_oob_parse_contact_info(ompi_universe_info.gpr_replica, ompi_process_info.gpr_replica, NULL); diff --git a/src/runtime/ompi_rte_parse_environ.c b/src/runtime/ompi_rte_parse_environ.c index 4ac53bb238..6ada685f2d 100644 --- a/src/runtime/ompi_rte_parse_environ.c +++ b/src/runtime/ompi_rte_parse_environ.c @@ -62,7 +62,7 @@ void ompi_rte_parse_environ(void) mca_base_param_lookup_string(id, &ompi_universe_info.gpr_replica); if (NULL != ompi_universe_info.gpr_replica) { mca_oob_set_contact_info(ompi_universe_info.gpr_replica); - ompi_process_info.gpr_replica = ns_base_create_process_name(0,0,0); + ompi_process_info.gpr_replica = mca_ns_base_create_process_name(0,0,0); mca_oob_parse_contact_info(ompi_universe_info.gpr_replica, ompi_process_info.gpr_replica, NULL); } else { @@ -76,7 +76,7 @@ void ompi_rte_parse_environ(void) mca_base_param_lookup_string(id, &ompi_universe_info.ns_replica); if (NULL != ompi_universe_info.ns_replica) { mca_oob_set_contact_info(ompi_universe_info.ns_replica); - ompi_process_info.ns_replica = ns_base_create_process_name(0,0,0); + ompi_process_info.ns_replica = mca_ns_base_create_process_name(0,0,0); mca_oob_parse_contact_info(ompi_universe_info.ns_replica, ompi_process_info.ns_replica, NULL); } else { diff --git a/src/runtime/ompi_rte_process_status.c b/src/runtime/ompi_rte_process_status.c new file mode 100644 index 0000000000..fe71ac8887 --- /dev/null +++ b/src/runtime/ompi_rte_process_status.c @@ -0,0 +1,110 @@ +/* + * $HEADER$ + */ +/** @file: + * + * The Open MPI general purpose registry - implementation. + * + */ + +/* + * includes + */ + +#include "ompi_config.h" + +#include "util/proc_info.h" +#include "util/sys_info.h" + +#include "mca/gpr/base/base.h" + +#include "runtime/runtime.h" + +ompi_rte_process_status_t *ompi_rte_get_process_status(ompi_process_name_t *proc) +{ + char *segment, *tokens[2]; + ompi_registry_value_t *value; + ompi_rte_process_status_t *stat_ptr; + ompi_list_t *returned_list; + + /* setup tokens and segments for this job */ + asprintf(&segment, "%s-%s", OMPI_RTE_JOB_STATUS_SEGMENT, ompi_name_server.get_jobid_string(proc)); + + tokens[0] = ompi_name_server.get_proc_name_string(proc); + tokens[1] = NULL; + + returned_list = ompi_registry.get(OMPI_REGISTRY_XAND, segment, tokens); + + free(segment); + free(tokens[0]); + + if (NULL != (value = (ompi_registry_value_t*)ompi_list_remove_first(returned_list))) { + stat_ptr = ompi_rte_unpack_process_status(value); + + return stat_ptr; + } + + return NULL; +} + + +int ompi_rte_set_process_status(ompi_rte_process_status_t *status, + ompi_process_name_t *proc) +{ + char *segment; + char *tokens[2]; + void *addr; + int size; + ompi_buffer_t buffer; + + /* setup keys and segment for this job */ + asprintf(&segment, "%s-%s", OMPI_RTE_JOB_STATUS_SEGMENT, ompi_name_server.get_jobid_string(proc)); + tokens[0] = ompi_name_server.get_proc_name_string(proc); + tokens[1] = NULL; + + /* create the buffer to store the status information */ + ompi_buffer_init(&buffer, 0); + ompi_pack(buffer, &status->status_key, 1, MCA_GPR_OOB_PACK_STATUS_KEY); + ompi_pack(buffer, &status->exit_code, 1, MCA_GPR_OOB_PACK_EXIT_CODE); + + /* peek the buffer and resulting size */ + ompi_buffer_get(buffer, &addr, &size); + + ompi_registry.put(OMPI_REGISTRY_XAND | OMPI_REGISTRY_OVERWRITE, + segment, tokens, addr, size); + + if ((OMPI_PROC_STOPPED == status->status_key) || + (OMPI_PROC_KILLED == status->status_key)) { + ompi_registry.cleanup_process(true, proc); /* purge subscriptions */ + } else if (OMPI_PROC_TERMINATING == status->status_key) { + ompi_registry.cleanup_process(false, proc); /* just cleanup - don't purge subs */ + } + + /* cleanup */ + free(tokens[0]); + free(segment); + ompi_buffer_free(buffer); + + return OMPI_SUCCESS; +} + + +ompi_rte_process_status_t +*ompi_rte_unpack_process_status(ompi_registry_value_t *value) +{ + ompi_buffer_t buffer; + ompi_rte_process_status_t *stat_ptr; + + stat_ptr = (ompi_rte_process_status_t*)malloc(sizeof(ompi_rte_process_status_t)); + + /* transfer ownership of registry object to buffer and unpack */ + ompi_buffer_init_preallocated(&buffer, value->object, value->object_size); + value->object = NULL; + value->object_size = 0; + OBJ_RELEASE(value); + + ompi_unpack(buffer, &stat_ptr->status_key, 1, MCA_GPR_OOB_PACK_STATUS_KEY); + ompi_unpack(buffer, &stat_ptr->exit_code, 1, MCA_GPR_OOB_PACK_EXIT_CODE); + + return stat_ptr; +} diff --git a/src/runtime/ompi_rte_wait_startup_shutdown_msg.c b/src/runtime/ompi_rte_wait_startup_shutdown_msg.c new file mode 100644 index 0000000000..581e7a891a --- /dev/null +++ b/src/runtime/ompi_rte_wait_startup_shutdown_msg.c @@ -0,0 +1,135 @@ +/* + * $HEADER$ + */ + +/** @file **/ + +#include "ompi_config.h" + +#include "runtime/runtime.h" +#include "mca/gpr/base/base.h" +#include "mca/oob/base/base.h" + +/* + * Local functions + */ +void +ompi_rte_decode_startup_msg(int status, ompi_process_name_t *peer, + ompi_buffer_t msg, int tag, void *cbdata); + +void +ompi_rte_decode_shutdown_msg(int status, ompi_process_name_t *peer, + ompi_buffer_t msg, int tag, void *cbdata); + +static void +ompi_rte_decode_startup_shutdown_msg(ompi_registry_notify_action_t state, + int status, ompi_process_name_t *peer, + ompi_buffer_t msg, int tag, void *cbdata); + +/* + * Main functions + */ +int ompi_rte_wait_startup_msg(void) +{ + + return mca_oob_xcast(NULL, NULL, NULL, ompi_rte_decode_startup_msg); +} + + +void +ompi_rte_decode_startup_msg(int status, ompi_process_name_t *peer, + ompi_buffer_t msg, int tag, void *cbdata) +{ + ompi_rte_decode_startup_shutdown_msg(OMPI_REGISTRY_NOTIFY_ON_STARTUP, + status, peer, msg, tag, cbdata); +} + + +int ompi_rte_wait_shutdown_msg(void) +{ + return mca_oob_xcast(NULL, NULL, NULL, ompi_rte_decode_shutdown_msg); +} + + +void +ompi_rte_decode_shutdown_msg(int status, ompi_process_name_t *peer, + ompi_buffer_t msg, int tag, void *cbdata) +{ + ompi_rte_decode_startup_shutdown_msg(OMPI_REGISTRY_NOTIFY_ON_SHUTDOWN, + status, peer, msg, tag, cbdata); +} + + +/* + * Unpack the startup/shutdown message. + * When a startup/shutdown message is received, it contains data objects from + * several pre-defined registry segments. This includes OOB contact info, + * PTL contact info, and other things. Each of these subsystems has a + * callback function that is used to receive updates from the registry + * This function deconstructs the message and builds a notify + * message for each segment, and then passes that message to the appropriate + * callback function as if it came directly from the registry. + */ + +static void +ompi_rte_decode_startup_shutdown_msg(ompi_registry_notify_action_t state, + int status, ompi_process_name_t *peer, + ompi_buffer_t msg, int tag, void *cbdata) +{ + char *segment; + ompi_registry_notify_message_t *notify_msg; + ompi_registry_value_t *data_value; + ompi_registry_object_t *data_object; + ompi_registry_object_size_t data_obj_size; + int32_t num_objects, i; + + if (ompi_rte_debug_flag) { + if (OMPI_REGISTRY_NOTIFY_ON_STARTUP == state) { + ompi_output(0, "[%d,%d,%d] decoding startup msg", + OMPI_NAME_ARGS(*ompi_rte_get_self())); + } else { + ompi_output(0, "[%d,%d,%d] decoding shutdown msg", + OMPI_NAME_ARGS(*ompi_rte_get_self())); + } + } + + if (OMPI_REGISTRY_NOTIFY_ON_SHUTDOWN == state) { + ompi_buffer_free(msg); + return; + } + + while (0 < ompi_unpack_string(msg, &segment)) { + if (ompi_rte_debug_flag) { + ompi_output(0, "[%d,%d,%d] decoding msg for segment %s", + OMPI_NAME_ARGS(*ompi_rte_get_self()), segment); + } + + ompi_unpack(msg, &num_objects, 1, OMPI_INT32); /* unpack #data objects */ + + if (0 < num_objects) { + notify_msg = OBJ_NEW(ompi_registry_notify_message_t); + notify_msg->segment = strdup(segment); + + for (i=0; i < num_objects; i++) { + + data_value = OBJ_NEW(ompi_registry_value_t); + ompi_unpack(msg, &data_obj_size, 1, MCA_GPR_OOB_PACK_OBJECT_SIZE); + data_object = (ompi_registry_object_t)malloc(data_obj_size); + ompi_unpack(msg, data_object, data_obj_size, OMPI_BYTE); + data_value->object = data_object; + data_value->object_size = data_obj_size; + + ompi_list_append(¬ify_msg->data, &data_value->item); + } + + if (ompi_rte_debug_flag) { + ompi_output(0, "[%d,%d,%d] delivering msg for segment %s with %d data objects", + OMPI_NAME_ARGS(*ompi_rte_get_self()), segment, (int)num_objects); + } + + ompi_registry.deliver_notify_msg(state, notify_msg); + } + + free(segment); + } +} diff --git a/src/runtime/ompi_vm_register.c b/src/runtime/ompi_vm_register.c index bad5b76a39..82f7b5ab18 100644 --- a/src/runtime/ompi_vm_register.c +++ b/src/runtime/ompi_vm_register.c @@ -65,7 +65,7 @@ int ompi_vm_register(void) keys[0] = ompi_name_server.get_proc_name_string(ompi_rte_get_self()); keys[1] = NULL; - ret_code = ompi_registry.put(OMPI_REGISTRY_XAND, "ompi-vm", keys, buffer, sizeof(buffer)); + ret_code = ompi_registry.put(OMPI_REGISTRY_XAND, OMPI_RTE_VM_STATUS_SEGMENT, keys, buffer, sizeof(buffer)); error: ompi_buffer_free(buffer); diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h index 89e8f5c66a..6f2953cef3 100644 --- a/src/runtime/runtime.h +++ b/src/runtime/runtime.h @@ -16,7 +16,7 @@ #include #endif -#include "mca/gpr/base/base.h" +#include "mca/gpr/gpr.h" #include "util/cmd_line.h" #include "runtime/runtime_types.h" @@ -26,6 +26,15 @@ mpiruntime/mpiruntime.h directly */ #include "mpi/runtime/mpiruntime.h" +/* constants defining runtime-related segment naming conventions for the + * registry + */ +#define OMPI_RTE_JOB_STATUS_SEGMENT "ompi-job-status" +#define OMPI_RTE_OOB_SEGMENT "ompi-oob" +#define OMPI_RTE_VM_STATUS_SEGMENT "ompi-vm-status" +#define OMPI_RTE_SCHED_SEGMENT "ompi-sched" +#define OMPI_RTE_MODEX_SEGMENT "ompi_modex" + /* constants for spawn constraints */ /** Spawn constraint - require multi-cell support. The selected spawn @@ -81,17 +90,26 @@ extern "C" { OMPI_DECLSPEC extern ompi_universe_t ompi_universe_info; - /* Define the startup/shutdown xcast message format for sending information - * from compound registry commands at the beginning and end of processes. - */ - struct ompi_startup_shutdown_message_t { - ompi_list_item_t item; - ompi_registry_notify_message_t *msg; + struct ompi_rte_process_status_t { + ompi_status_key_t status_key; + ompi_exit_code_t exit_code; }; - typedef struct ompi_startup_shutdown_message_t ompi_startup_shutdown_message_t; + typedef struct ompi_rte_process_status_t ompi_rte_process_status_t; - OBJ_CLASS_DECLARATION(ompi_startup_shutdown_message_t); + + struct ompi_rte_vm_status_t { + char *nodename; + ompi_list_t processes; + }; + typedef struct ompi_rte_vm_status_t ompi_rte_vm_status_t; + + struct ompi_rte_vm_process_t { + ompi_list_item_t *item; + ompi_process_name_t *name; + int32_t local_pid; + }; + typedef struct ompi_rte_vm_process_t ompi_rte_vm_process_t; /** * Initialize the Open MPI support code @@ -271,32 +289,45 @@ OMPI_DECLSPEC ompi_process_name_t* ompi_rte_get_self(void); OMPI_DECLSPEC int ompi_rte_get_peers(ompi_process_name_t **peers, size_t *npeers); /** - * "Hold" until all procs registered, or timeout occurs + * Get current status of the process */ - -OMPI_DECLSPEC int ompi_rte_monitor_procs_registered(void); +OMPI_DECLSPEC ompi_rte_process_status_t *ompi_rte_get_process_status(ompi_process_name_t *proc); /** - * "Hold" until all procs unregistered - no timeout. + * Set process status */ - -OMPI_DECLSPEC int ompi_rte_monitor_procs_unregistered(void); + +OMPI_DECLSPEC int ompi_rte_set_process_status(ompi_rte_process_status_t *status, + ompi_process_name_t *proc); /** - * Callback function for all procs registered + * Unpack the process status structure stored on the registry */ +OMPI_DECLSPEC ompi_rte_process_status_t *ompi_rte_unpack_process_status(ompi_registry_value_t *value); + + /** + * Hold for startup message to arrive, then decode it. + */ + +OMPI_DECLSPEC int ompi_rte_wait_startup_msg(void); + + /** + * Hold for shutdown message to arrive, then decode it. + */ + +OMPI_DECLSPEC int ompi_rte_wait_shutdown_msg(void); + + /** + * Change state as processes complete registration/unregistration + */ + OMPI_DECLSPEC void ompi_rte_all_procs_registered(ompi_registry_notify_message_t* match, void* cbdata); - /** - * Callback function for all procs unregistered - */ OMPI_DECLSPEC void ompi_rte_all_procs_unregistered(ompi_registry_notify_message_t* match, void* cbdata); - /** - * Remove process registration. - */ +OMPI_DECLSPEC int ompi_rte_monitor_procs_registered(void); -OMPI_DECLSPEC int ompi_rte_unregister(void); +OMPI_DECLSPEC int ompi_rte_monitor_procs_unregistered(void); /** * Kill a specific process in this cell @@ -421,6 +452,15 @@ OMPI_DECLSPEC void ompi_rte_parse_environ(void); */ OMPI_DECLSPEC int ompi_vm_register(void); + /** + * Startup a job - notify processes that all ready to begin + */ +OMPI_DECLSPEC int ompi_rte_job_startup(mca_ns_base_jobid_t jobid); + + /** + * Shutdown a job - notify processes that all ready to stop + */ +OMPI_DECLSPEC int ompi_rte_job_shutdown(mca_ns_base_jobid_t jobid); #if defined(c_plusplus) || defined(__cplusplus) } diff --git a/src/runtime/runtime_types.h b/src/runtime/runtime_types.h index 32731f2379..9d61020fd1 100644 --- a/src/runtime/runtime_types.h +++ b/src/runtime/runtime_types.h @@ -25,6 +25,15 @@ #if defined(c_plusplus) || defined(__cplusplus) extern "C" { #endif + +/* + * define size of exit codes - should be moved to status monitor framework + * when that becomes available + */ + typedef int8_t ompi_exit_code_t; + typedef int8_t ompi_status_key_t; + + /** * Spawn Handle * diff --git a/src/runtime/universe_exists.c b/src/runtime/universe_exists.c index bf82c7dc46..f958cf4efc 100644 --- a/src/runtime/universe_exists.c +++ b/src/runtime/universe_exists.c @@ -190,13 +190,13 @@ int ompi_rte_universe_exists() free(ompi_process_info.ns_replica); ompi_process_info.ns_replica = NULL; } - ompi_process_info.ns_replica = ns_base_copy_process_name(&proc); + ompi_process_info.ns_replica = mca_ns_base_copy_process_name(&proc); if (NULL != ompi_process_info.gpr_replica) { free(ompi_process_info.gpr_replica); ompi_process_info.gpr_replica = NULL; } - ompi_process_info.gpr_replica = ns_base_copy_process_name(&proc); + ompi_process_info.gpr_replica = mca_ns_base_copy_process_name(&proc); if (NULL != ompi_universe_info.ns_replica) { free(ompi_universe_info.ns_replica); diff --git a/src/tools/console/ompiconsole.c b/src/tools/console/ompiconsole.c index 01aa4c237f..3b11e46dd3 100644 --- a/src/tools/console/ompiconsole.c +++ b/src/tools/console/ompiconsole.c @@ -144,7 +144,7 @@ int main(int argc, char *argv[]) } } else if (0 == strncmp(usercmd, "dumpvm", strlen("dumpvm"))) { fprintf(stderr, "getting vm list\n"); - list = ompi_registry.get(OMPI_REGISTRY_OR, "ompi-vm", NULL); + list = ompi_registry.get(OMPI_REGISTRY_OR, OMPI_RTE_VM_STATUS_SEGMENT, NULL); fprintf(stderr, "got vm list: length %d\n", (int)ompi_list_get_size(list)); for (item = ompi_list_get_first(list); item != ompi_list_get_end(list); diff --git a/src/tools/mpirun/mpirun.c b/src/tools/mpirun/mpirun.c index 7293f69294..47d9f467ea 100644 --- a/src/tools/mpirun/mpirun.c +++ b/src/tools/mpirun/mpirun.c @@ -30,6 +30,7 @@ #include "mca/base/base.h" #include "mca/ns/ns.h" #include "mca/ns/base/base.h" +#include "mca/gpr/base/base.h" #include "mca/pcm/base/base.h" #include "mca/oob/base/base.h" @@ -49,12 +50,13 @@ main(int argc, char *argv[]) ompi_list_t *nodelist = NULL; ompi_list_t schedlist; mca_ns_base_jobid_t new_jobid; - int num_procs = 1, rc; + int num_procs = 1; ompi_rte_node_schedule_t *sched; char cwd[MAXPATHLEN]; char *my_contact_info, *tmp; char *contact_file, *filenm, *segment; ompi_rte_spawn_handle_t *spawn_handle; + ompi_registry_notify_id_t rc_tag; /* * Intialize our Open MPI environment @@ -270,25 +272,26 @@ main(int argc, char *argv[]) * register to monitor the startup and shutdown processes */ /* setup segment for this job */ - asprintf(&segment, "ompi-job-%X", new_jobid); + asprintf(&segment, "%s-%s", OMPI_RTE_JOB_STATUS_SEGMENT, + ompi_name_server.convert_jobid_to_string(new_jobid)); /* register a synchro on the segment so we get notified when everyone registers */ - rc = ompi_registry.synchro( - OMPI_REGISTRY_SYNCHRO_MODE_LEVEL|OMPI_REGISTRY_SYNCHRO_MODE_ONE_SHOT, - OMPI_REGISTRY_OR, - segment, - NULL, - num_procs, - ompi_rte_all_procs_registered, NULL); + rc_tag = ompi_registry.synchro( + OMPI_REGISTRY_SYNCHRO_MODE_LEVEL|OMPI_REGISTRY_SYNCHRO_MODE_ONE_SHOT, + OMPI_REGISTRY_OR, + segment, + NULL, + num_procs, + ompi_rte_all_procs_registered, NULL); /* register a synchro on the segment so we get notified when everyone is gone */ - rc = ompi_registry.synchro( - OMPI_REGISTRY_SYNCHRO_MODE_DESCENDING|OMPI_REGISTRY_SYNCHRO_MODE_ONE_SHOT, - OMPI_REGISTRY_OR, - segment, - NULL, - 0, - ompi_rte_all_procs_unregistered, NULL); + rc_tag = ompi_registry.synchro( + OMPI_REGISTRY_SYNCHRO_MODE_DESCENDING|OMPI_REGISTRY_SYNCHRO_MODE_ONE_SHOT, + OMPI_REGISTRY_OR, + segment, + NULL, + 0, + ompi_rte_all_procs_unregistered, NULL); /* * spawn procs @@ -303,8 +306,11 @@ main(int argc, char *argv[]) if (OMPI_SUCCESS != (ret = ompi_rte_monitor_procs_registered())) { ompi_show_help("help-mpirun.txt", "mpirun:proc-reg-failed", true, argv[0], ret); + ompi_rte_job_shutdown(new_jobid); } else { + ompi_rte_job_startup(new_jobid); ompi_rte_monitor_procs_unregistered(); + ompi_rte_job_shutdown(new_jobid); } /* * - ompi_rte_kill_job() diff --git a/src/tools/ompid/ompid.c b/src/tools/ompid/ompid.c index 6011be57a7..a71b5dc361 100644 --- a/src/tools/ompid/ompid.c +++ b/src/tools/ompid/ompid.c @@ -157,24 +157,24 @@ int main(int argc, char *argv[]) return ret; } - /* - * Register my process info with my replica. Note that this must be done - * after the rte init is completed. - */ - contact_info = mca_oob_get_contact_info(); - ompi_rte_get_peers(NULL, &nprocs); - if (OMPI_SUCCESS != (ret = ompi_registry.rte_register(contact_info, nprocs, - ompi_rte_all_procs_registered, NULL, - ompi_rte_all_procs_unregistered, NULL))) { - ompi_output(0, "ompi_rte_init: failed in ompi_rte_register");; - return ret; - } +/* /\* */ +/* * Register my process info with my replica. Note that this must be done */ +/* * after the rte init is completed. */ +/* *\/ */ +/* contact_info = mca_oob_get_contact_info(); */ +/* ompi_rte_get_peers(NULL, &nprocs); */ +/* if (OMPI_SUCCESS != (ret = ompi_rte_register(contact_info, nprocs, */ +/* ompi_rte_all_procs_registered, NULL, */ +/* ompi_rte_all_procs_unregistered, NULL))) { */ +/* ompi_output(0, "ompi_rte_init: failed in ompi_rte_register");; */ +/* return ret; */ +/* } */ - /* wait for all the daemons to have registered so we can be sure to get everyone's contact info */ - if (OMPI_SUCCESS != (ret = ompi_rte_monitor_procs_registered())) { - ompi_output(0, "ompi_rte_init: failed to see all procs register"); - return ret; - } +/* /\* wait for all the daemons to have registered so we can be sure to get everyone's contact info *\/ */ +/* if (OMPI_SUCCESS != (ret = ompi_rte_monitor_procs_registered())) { */ +/* ompi_output(0, "ompi_rte_init: failed to see all procs register"); */ +/* return ret; */ +/* } */ /* if i'm the seed, get my contact info and write my setup file for others to find */ if (ompi_process_info.seed) { diff --git a/src/util/bufpack.c b/src/util/bufpack.c index 1b9500dce5..2be1981aac 100644 --- a/src/util/bufpack.c +++ b/src/util/bufpack.c @@ -372,8 +372,6 @@ return (OMPI_SUCCESS); uint32_t * d32; uint16_t * s16; uint32_t * s32; - ompi_process_name_t *dn; - ompi_process_name_t *sn; /* first find the destination location in the buffer */ if (!buffer) { return (OMPI_ERROR); } @@ -386,6 +384,7 @@ return (OMPI_SUCCESS); /* calculate op_size data size */ switch(type) { case OMPI_BYTE: + case OMPI_INT8: op_size = n; break; case OMPI_STRING: @@ -411,11 +410,14 @@ return (OMPI_SUCCESS); case OMPI_INT32: op_size = n*sizeof(uint32_t); break; + case OMPI_JOBID: + op_size = n*sizeof(mca_ns_base_jobid_t); + break; case OMPI_NAME: - op_size = n*sizeof(ompi_process_name_t); - break; + op_size = n*sizeof(ompi_process_name_t); + break; default: - return OMPI_ERROR; + return OMPI_ERROR; } if (op_size > bptr->space) { /* need to expand the buffer */ @@ -429,6 +431,7 @@ return (OMPI_SUCCESS); switch(type) { case OMPI_BYTE: + case OMPI_INT8: memcpy(dest, src, n); break; case OMPI_PACKED: @@ -454,15 +457,11 @@ return (OMPI_SUCCESS); strncpy((char*) dest, (char*) src, n); *((char *) dest + n - 1) = '\0'; break; + case OMPI_JOBID: + mca_ns_base_pack_jobid(dest, src, n); + break; case OMPI_NAME: - dn = (ompi_process_name_t*) dest; - sn = (ompi_process_name_t*) src; - for (i=0; icellid = htonl(sn->cellid); - dn->jobid = htonl(sn->jobid); - dn->vpid = htonl(sn->vpid); - dn++; sn++; - } + mca_ns_base_pack_name(dest, src, n); break; default: return OMPI_ERROR; @@ -501,8 +500,6 @@ ompi_unpack(ompi_buffer_t buffer, void * dest, size_t n, ompi_pack_type_t type) uint32_t * d32; uint16_t * s16; uint32_t * s32; - ompi_process_name_t *dn; - ompi_process_name_t *sn; /* first find the source location in the buffer */ if (!buffer) { return (OMPI_ERROR); } @@ -515,6 +512,7 @@ ompi_unpack(ompi_buffer_t buffer, void * dest, size_t n, ompi_pack_type_t type) switch(type) { case OMPI_BYTE: case OMPI_STRING: + case OMPI_INT8: op_size = n; break; case OMPI_PACKED: @@ -527,9 +525,12 @@ ompi_unpack(ompi_buffer_t buffer, void * dest, size_t n, ompi_pack_type_t type) case OMPI_INT32: op_size = n*sizeof(uint32_t); break; + case OMPI_JOBID: + op_size = n*sizeof(mca_ns_base_jobid_t); + break; case OMPI_NAME: - op_size = n*sizeof(ompi_process_name_t); - break; + op_size = n*sizeof(ompi_process_name_t); + break; default: return OMPI_ERROR; } @@ -547,8 +548,9 @@ ompi_unpack(ompi_buffer_t buffer, void * dest, size_t n, ompi_pack_type_t type) switch(type) { case OMPI_BYTE: + case OMPI_INT8: memcpy(dest, src, n); - break; + break; case OMPI_PACKED: return OMPI_ERROR; case OMPI_INT16: @@ -571,15 +573,11 @@ ompi_unpack(ompi_buffer_t buffer, void * dest, size_t n, ompi_pack_type_t type) strncpy((char*) dest, (char*) src, n); *((char *) dest + n - 1) = '\0'; break; + case OMPI_JOBID: + mca_ns_base_unpack_jobid(dest, src, n); + break; case OMPI_NAME: - dn = (ompi_process_name_t*) dest; - sn = (ompi_process_name_t*) src; - for (i=0; icellid = ntohl(sn->cellid); - dn->jobid = ntohl(sn->jobid); - dn->vpid = ntohl(sn->vpid); - dn++; sn++; - } + mca_ns_base_unpack_name(dest, src, n); break; default: return OMPI_ERROR; diff --git a/src/util/bufpack.h b/src/util/bufpack.h index d82c77c15c..29f04d4ba1 100644 --- a/src/util/bufpack.h +++ b/src/util/bufpack.h @@ -20,6 +20,7 @@ #define _OMPI_PACK_H_ #include "ompi_config.h" +#include "mca/ns/base/base.h" /* * Other constants */ @@ -32,10 +33,12 @@ typedef enum { OMPI_BYTE, /**< a byte of data */ + OMPI_INT8, /**< an 8-bit integer */ OMPI_INT16, /**< a 16 bit integer */ OMPI_INT32, /**< a 32 bit integer */ OMPI_STRING, /**< a NULL terminated string */ OMPI_NAME, /**< an ompi_process_name_t */ + OMPI_JOBID, /**< a jobid */ OMPI_PACKED /**< already packed data. */ } ompi_pack_type_t;