1
1

Mostly just added some diagnostic messages to help chase down a problem in comm_spawn. Fixed an error in the gpr notification system - not sure if that totally fixes the problem, but definitely made progress on it.

This commit was SVN r3709.
Этот коммит содержится в:
Ralph Castain 2004-12-06 16:50:45 +00:00
родитель 8304d0c5fa
Коммит 1454832bf7
6 изменённых файлов: 42 добавлений и 16 удалений

Просмотреть файл

@ -339,15 +339,16 @@ static int mca_base_modex_subscribe(ompi_process_name_t* name)
/* otherwise - subscribe */
asprintf(&segment, "%s-%s", OMPI_RTE_MODEX_SEGMENT, mca_ns_base_get_jobid_string(name));
rctag = ompi_registry.subscribe(
OMPI_REGISTRY_OR,
OMPI_REGISTRY_NOTIFY_ADD_ENTRY|OMPI_REGISTRY_NOTIFY_DELETE_ENTRY|
OMPI_REGISTRY_NOTIFY_MODIFICATION|
OMPI_REGISTRY_NOTIFY_ON_STARTUP|OMPI_REGISTRY_NOTIFY_INCLUDE_STARTUP_DATA|
OMPI_REGISTRY_NOTIFY_ON_SHUTDOWN,
segment,
NULL,
mca_base_modex_registry_callback,
NULL);
OMPI_REGISTRY_OR,
OMPI_REGISTRY_NOTIFY_ADD_ENTRY|OMPI_REGISTRY_NOTIFY_DELETE_ENTRY|
OMPI_REGISTRY_NOTIFY_MODIFICATION|
OMPI_REGISTRY_NOTIFY_ON_STARTUP|OMPI_REGISTRY_NOTIFY_INCLUDE_STARTUP_DATA|
OMPI_REGISTRY_NOTIFY_PRE_EXISTING|
OMPI_REGISTRY_NOTIFY_ON_SHUTDOWN,
segment,
NULL,
mca_base_modex_registry_callback,
NULL);
if(rctag == OMPI_REGISTRY_NOTIFY_ID_MAX) {
ompi_output(0, "mca_base_modex_exchange: "
"ompi_registry.subscribe failed with return code %d\n", (int)rctag);

Просмотреть файл

@ -249,14 +249,16 @@ void mca_gpr_proxy_notify_recv(int status, ompi_process_name_t* sender,
char **tokptr;
mca_gpr_cmd_flag_t command;
uint32_t num_items;
uint32_t i, id_tag;
uint32_t i;
ompi_registry_notify_id_t id_tag;
ompi_registry_value_t *regval;
ompi_registry_notify_message_t *message;
bool found;
mca_gpr_proxy_notify_request_tracker_t *trackptr;
if (mca_gpr_proxy_debug) {
ompi_output(0, "gpr proxy: received trigger message");
ompi_output(0, "[%d,%d,%d] gpr proxy: received trigger message",
OMPI_NAME_ARGS(*ompi_rte_get_self()));
}
message = OBJ_NEW(ompi_registry_notify_message_t);
@ -275,9 +277,15 @@ void mca_gpr_proxy_notify_recv(int status, ompi_process_name_t* sender,
}
message->owning_job = (mca_ns_base_jobid_t)i;
if (OMPI_SUCCESS != ompi_unpack(buffer, &id_tag, 1, OMPI_INT32)) {
if (OMPI_SUCCESS != ompi_unpack(buffer, &i, 1, OMPI_INT32)) {
goto RETURN_ERROR;
}
id_tag = (ompi_registry_notify_id_t)i;
if (mca_gpr_proxy_debug) {
ompi_output(0, "[%d,%d,%d] trigger from segment %s id %d",
OMPI_NAME_ARGS(*ompi_rte_get_self()), message->segment, (int)id_tag);
}
if (OMPI_SUCCESS != ompi_unpack(buffer, &message->trig_action, 1, MCA_GPR_OOB_PACK_ACTION)) {
goto RETURN_ERROR;
@ -330,6 +338,8 @@ void mca_gpr_proxy_notify_recv(int status, ompi_process_name_t* sender,
for (trackptr = (mca_gpr_proxy_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_proxy_notify_request_tracker);
trackptr != (mca_gpr_proxy_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_proxy_notify_request_tracker) && !found;
trackptr = (mca_gpr_proxy_notify_request_tracker_t*)ompi_list_get_next(trackptr)) {
ompi_output(0, "\tchecking trigger %d for segment %s\n", trackptr->local_idtag,
trackptr->segment);
if (trackptr->local_idtag == id_tag) {
found = true;
}

Просмотреть файл

@ -84,6 +84,12 @@ mca_gpr_proxy_subscribe(ompi_registry_mode_t mode,
goto CLEANUP;
}
if (mca_gpr_proxy_debug) {
ompi_output(0, "[%d,%d,%d] gpr proxy subscribe: subscribing to segment %s local idtag %d",
OMPI_NAME_ARGS(*ompi_rte_get_self()), segment, (int)idtag);
}
if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) {
goto CLEANUP;
}

Просмотреть файл

@ -232,6 +232,12 @@ bool mca_gpr_replica_process_triggers(mca_gpr_replica_segment_t *seg,
cb->user_tag = NULL;
cb->message = message;
cb->remote_idtag = trackptr->remote_idtag;
if (mca_gpr_replica_debug) {
ompi_output(0, "[%d,%d,%d] process_trig: queueing message for [%d,%d,%d] with idtag %d using remoteid %d\n",
OMPI_NAME_ARGS(*ompi_rte_get_self()), OMPI_NAME_ARGS(*(cb->requestor)),
(int)cb->remote_idtag, (int)trackptr->remote_idtag);
}
}
ompi_list_append(&mca_gpr_replica_callbacks, &cb->item);
@ -316,8 +322,7 @@ mca_gpr_replica_enter_notify_request(mca_gpr_replica_segment_t *seg,
trackptr->segptr = seg;
trackptr->action = action;
trackptr->requestor = ompi_name_server.copy_process_name(requestor);
trackptr->local_idtag = idtag;
trackptr->remote_idtag = OMPI_REGISTRY_NOTIFY_ID_MAX;
trackptr->remote_idtag = idtag;
trackptr->callback = cb_func;
trackptr->user_tag = user_tag;
if (ompi_list_is_empty(&mca_gpr_replica_free_notify_id_tags)) {

Просмотреть файл

@ -926,6 +926,11 @@ static ompi_registry_notify_id_t mca_gpr_replica_recv_subscribe_cmd(ompi_process
if (NULL != sender) { /* remote sender */
if (mca_gpr_replica_debug) {
ompi_output(0, "[%d,%d,%d] subscribe created for remote sender [%d,%d,%d] on segment %s for idtag %d",
OMPI_NAME_ARGS(*ompi_rte_get_self()), OMPI_NAME_ARGS(*sender), segment, id_tag);
}
/* enter request on local notify tracking system */
local_idtag1 = mca_gpr_replica_enter_notify_request(seg, action, sender, id_tag, NULL, NULL);

Просмотреть файл

@ -323,8 +323,7 @@ void mca_gpr_replica_remote_notify(ompi_process_name_t *recipient, int recipient
if (OMPI_SUCCESS != ompi_pack(msg, regval->object, regval->object_size, OMPI_BYTE)) {
return;
}
/* TSW - should we add */
/* OBJ_RELEASE(regval); */
OBJ_RELEASE(regval);
}
}
if (OMPI_SUCCESS != ompi_pack(msg, &message->num_tokens, 1, OMPI_INT32)) {