From 1454832bf70a78e7aa3f36c49db0d982a7ebd0a9 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Mon, 6 Dec 2004 16:50:45 +0000 Subject: [PATCH] Mostly just added some diagnostic messages to help chase down a problem in comm_spawn. Fixed an error in the gpr notification system - not sure if that totally fixes the problem, but definitely made progress on it. This commit was SVN r3709. --- src/mca/base/mca_base_module_exchange.c | 19 ++++++++++--------- src/mca/gpr/proxy/gpr_proxy_component.c | 16 +++++++++++++--- src/mca/gpr/proxy/gpr_proxy_subscribe.c | 6 ++++++ .../gpr_replica_internals_trigger_ops.c | 9 +++++++-- .../gpr/replica/gpr_replica_recv_proxy_msgs.c | 5 +++++ src/mca/gpr/replica/gpr_replica_xmit_alerts.c | 3 +-- 6 files changed, 42 insertions(+), 16 deletions(-) diff --git a/src/mca/base/mca_base_module_exchange.c b/src/mca/base/mca_base_module_exchange.c index 2bef97f96d..1204c12172 100644 --- a/src/mca/base/mca_base_module_exchange.c +++ b/src/mca/base/mca_base_module_exchange.c @@ -339,15 +339,16 @@ static int mca_base_modex_subscribe(ompi_process_name_t* name) /* otherwise - subscribe */ asprintf(&segment, "%s-%s", OMPI_RTE_MODEX_SEGMENT, mca_ns_base_get_jobid_string(name)); rctag = ompi_registry.subscribe( - OMPI_REGISTRY_OR, - OMPI_REGISTRY_NOTIFY_ADD_ENTRY|OMPI_REGISTRY_NOTIFY_DELETE_ENTRY| - OMPI_REGISTRY_NOTIFY_MODIFICATION| - OMPI_REGISTRY_NOTIFY_ON_STARTUP|OMPI_REGISTRY_NOTIFY_INCLUDE_STARTUP_DATA| - OMPI_REGISTRY_NOTIFY_ON_SHUTDOWN, - segment, - NULL, - mca_base_modex_registry_callback, - NULL); + OMPI_REGISTRY_OR, + OMPI_REGISTRY_NOTIFY_ADD_ENTRY|OMPI_REGISTRY_NOTIFY_DELETE_ENTRY| + OMPI_REGISTRY_NOTIFY_MODIFICATION| + OMPI_REGISTRY_NOTIFY_ON_STARTUP|OMPI_REGISTRY_NOTIFY_INCLUDE_STARTUP_DATA| + OMPI_REGISTRY_NOTIFY_PRE_EXISTING| + OMPI_REGISTRY_NOTIFY_ON_SHUTDOWN, + segment, + NULL, + mca_base_modex_registry_callback, + NULL); if(rctag == OMPI_REGISTRY_NOTIFY_ID_MAX) { ompi_output(0, "mca_base_modex_exchange: " "ompi_registry.subscribe failed with return code %d\n", (int)rctag); diff --git a/src/mca/gpr/proxy/gpr_proxy_component.c b/src/mca/gpr/proxy/gpr_proxy_component.c index ea0dd2c71b..7fe5d68856 100644 --- a/src/mca/gpr/proxy/gpr_proxy_component.c +++ b/src/mca/gpr/proxy/gpr_proxy_component.c @@ -249,14 +249,16 @@ void mca_gpr_proxy_notify_recv(int status, ompi_process_name_t* sender, char **tokptr; mca_gpr_cmd_flag_t command; uint32_t num_items; - uint32_t i, id_tag; + uint32_t i; + ompi_registry_notify_id_t id_tag; ompi_registry_value_t *regval; ompi_registry_notify_message_t *message; bool found; mca_gpr_proxy_notify_request_tracker_t *trackptr; if (mca_gpr_proxy_debug) { - ompi_output(0, "gpr proxy: received trigger message"); + ompi_output(0, "[%d,%d,%d] gpr proxy: received trigger message", + OMPI_NAME_ARGS(*ompi_rte_get_self())); } message = OBJ_NEW(ompi_registry_notify_message_t); @@ -275,9 +277,15 @@ void mca_gpr_proxy_notify_recv(int status, ompi_process_name_t* sender, } message->owning_job = (mca_ns_base_jobid_t)i; - if (OMPI_SUCCESS != ompi_unpack(buffer, &id_tag, 1, OMPI_INT32)) { + if (OMPI_SUCCESS != ompi_unpack(buffer, &i, 1, OMPI_INT32)) { goto RETURN_ERROR; } + id_tag = (ompi_registry_notify_id_t)i; + + if (mca_gpr_proxy_debug) { + ompi_output(0, "[%d,%d,%d] trigger from segment %s id %d", + OMPI_NAME_ARGS(*ompi_rte_get_self()), message->segment, (int)id_tag); + } if (OMPI_SUCCESS != ompi_unpack(buffer, &message->trig_action, 1, MCA_GPR_OOB_PACK_ACTION)) { goto RETURN_ERROR; @@ -330,6 +338,8 @@ void mca_gpr_proxy_notify_recv(int status, ompi_process_name_t* sender, for (trackptr = (mca_gpr_proxy_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_proxy_notify_request_tracker); trackptr != (mca_gpr_proxy_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_proxy_notify_request_tracker) && !found; trackptr = (mca_gpr_proxy_notify_request_tracker_t*)ompi_list_get_next(trackptr)) { + ompi_output(0, "\tchecking trigger %d for segment %s\n", trackptr->local_idtag, + trackptr->segment); if (trackptr->local_idtag == id_tag) { found = true; } diff --git a/src/mca/gpr/proxy/gpr_proxy_subscribe.c b/src/mca/gpr/proxy/gpr_proxy_subscribe.c index 6942204830..2e17149f71 100644 --- a/src/mca/gpr/proxy/gpr_proxy_subscribe.c +++ b/src/mca/gpr/proxy/gpr_proxy_subscribe.c @@ -84,6 +84,12 @@ mca_gpr_proxy_subscribe(ompi_registry_mode_t mode, goto CLEANUP; } + if (mca_gpr_proxy_debug) { + ompi_output(0, "[%d,%d,%d] gpr proxy subscribe: subscribing to segment %s local idtag %d", + OMPI_NAME_ARGS(*ompi_rte_get_self()), segment, (int)idtag); + } + + if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) { goto CLEANUP; } diff --git a/src/mca/gpr/replica/gpr_replica_internals_trigger_ops.c b/src/mca/gpr/replica/gpr_replica_internals_trigger_ops.c index 7fb9e6c453..e14492fea1 100644 --- a/src/mca/gpr/replica/gpr_replica_internals_trigger_ops.c +++ b/src/mca/gpr/replica/gpr_replica_internals_trigger_ops.c @@ -232,6 +232,12 @@ bool mca_gpr_replica_process_triggers(mca_gpr_replica_segment_t *seg, cb->user_tag = NULL; cb->message = message; cb->remote_idtag = trackptr->remote_idtag; + if (mca_gpr_replica_debug) { + ompi_output(0, "[%d,%d,%d] process_trig: queueing message for [%d,%d,%d] with idtag %d using remoteid %d\n", + OMPI_NAME_ARGS(*ompi_rte_get_self()), OMPI_NAME_ARGS(*(cb->requestor)), + (int)cb->remote_idtag, (int)trackptr->remote_idtag); + } + } ompi_list_append(&mca_gpr_replica_callbacks, &cb->item); @@ -316,8 +322,7 @@ mca_gpr_replica_enter_notify_request(mca_gpr_replica_segment_t *seg, trackptr->segptr = seg; trackptr->action = action; trackptr->requestor = ompi_name_server.copy_process_name(requestor); - trackptr->local_idtag = idtag; - trackptr->remote_idtag = OMPI_REGISTRY_NOTIFY_ID_MAX; + trackptr->remote_idtag = idtag; trackptr->callback = cb_func; trackptr->user_tag = user_tag; if (ompi_list_is_empty(&mca_gpr_replica_free_notify_id_tags)) { diff --git a/src/mca/gpr/replica/gpr_replica_recv_proxy_msgs.c b/src/mca/gpr/replica/gpr_replica_recv_proxy_msgs.c index ecad748f15..dc2402a90f 100644 --- a/src/mca/gpr/replica/gpr_replica_recv_proxy_msgs.c +++ b/src/mca/gpr/replica/gpr_replica_recv_proxy_msgs.c @@ -926,6 +926,11 @@ static ompi_registry_notify_id_t mca_gpr_replica_recv_subscribe_cmd(ompi_process if (NULL != sender) { /* remote sender */ + if (mca_gpr_replica_debug) { + ompi_output(0, "[%d,%d,%d] subscribe created for remote sender [%d,%d,%d] on segment %s for idtag %d", + OMPI_NAME_ARGS(*ompi_rte_get_self()), OMPI_NAME_ARGS(*sender), segment, id_tag); + } + /* enter request on local notify tracking system */ local_idtag1 = mca_gpr_replica_enter_notify_request(seg, action, sender, id_tag, NULL, NULL); diff --git a/src/mca/gpr/replica/gpr_replica_xmit_alerts.c b/src/mca/gpr/replica/gpr_replica_xmit_alerts.c index 5fdd324ea6..bee7dfe136 100644 --- a/src/mca/gpr/replica/gpr_replica_xmit_alerts.c +++ b/src/mca/gpr/replica/gpr_replica_xmit_alerts.c @@ -323,8 +323,7 @@ void mca_gpr_replica_remote_notify(ompi_process_name_t *recipient, int recipient if (OMPI_SUCCESS != ompi_pack(msg, regval->object, regval->object_size, OMPI_BYTE)) { return; } - /* TSW - should we add */ - /* OBJ_RELEASE(regval); */ + OBJ_RELEASE(regval); } } if (OMPI_SUCCESS != ompi_pack(msg, &message->num_tokens, 1, OMPI_INT32)) {