Mostly just added some diagnostic messages to help chase down a problem in comm_spawn. Fixed an error in the gpr notification system - not sure if that totally fixes the problem, but definitely made progress on it.
This commit was SVN r3709.
Этот коммит содержится в:
родитель
8304d0c5fa
Коммит
1454832bf7
@ -339,15 +339,16 @@ static int mca_base_modex_subscribe(ompi_process_name_t* name)
|
||||
/* otherwise - subscribe */
|
||||
asprintf(&segment, "%s-%s", OMPI_RTE_MODEX_SEGMENT, mca_ns_base_get_jobid_string(name));
|
||||
rctag = ompi_registry.subscribe(
|
||||
OMPI_REGISTRY_OR,
|
||||
OMPI_REGISTRY_NOTIFY_ADD_ENTRY|OMPI_REGISTRY_NOTIFY_DELETE_ENTRY|
|
||||
OMPI_REGISTRY_NOTIFY_MODIFICATION|
|
||||
OMPI_REGISTRY_NOTIFY_ON_STARTUP|OMPI_REGISTRY_NOTIFY_INCLUDE_STARTUP_DATA|
|
||||
OMPI_REGISTRY_NOTIFY_ON_SHUTDOWN,
|
||||
segment,
|
||||
NULL,
|
||||
mca_base_modex_registry_callback,
|
||||
NULL);
|
||||
OMPI_REGISTRY_OR,
|
||||
OMPI_REGISTRY_NOTIFY_ADD_ENTRY|OMPI_REGISTRY_NOTIFY_DELETE_ENTRY|
|
||||
OMPI_REGISTRY_NOTIFY_MODIFICATION|
|
||||
OMPI_REGISTRY_NOTIFY_ON_STARTUP|OMPI_REGISTRY_NOTIFY_INCLUDE_STARTUP_DATA|
|
||||
OMPI_REGISTRY_NOTIFY_PRE_EXISTING|
|
||||
OMPI_REGISTRY_NOTIFY_ON_SHUTDOWN,
|
||||
segment,
|
||||
NULL,
|
||||
mca_base_modex_registry_callback,
|
||||
NULL);
|
||||
if(rctag == OMPI_REGISTRY_NOTIFY_ID_MAX) {
|
||||
ompi_output(0, "mca_base_modex_exchange: "
|
||||
"ompi_registry.subscribe failed with return code %d\n", (int)rctag);
|
||||
|
@ -249,14 +249,16 @@ void mca_gpr_proxy_notify_recv(int status, ompi_process_name_t* sender,
|
||||
char **tokptr;
|
||||
mca_gpr_cmd_flag_t command;
|
||||
uint32_t num_items;
|
||||
uint32_t i, id_tag;
|
||||
uint32_t i;
|
||||
ompi_registry_notify_id_t id_tag;
|
||||
ompi_registry_value_t *regval;
|
||||
ompi_registry_notify_message_t *message;
|
||||
bool found;
|
||||
mca_gpr_proxy_notify_request_tracker_t *trackptr;
|
||||
|
||||
if (mca_gpr_proxy_debug) {
|
||||
ompi_output(0, "gpr proxy: received trigger message");
|
||||
ompi_output(0, "[%d,%d,%d] gpr proxy: received trigger message",
|
||||
OMPI_NAME_ARGS(*ompi_rte_get_self()));
|
||||
}
|
||||
|
||||
message = OBJ_NEW(ompi_registry_notify_message_t);
|
||||
@ -275,9 +277,15 @@ void mca_gpr_proxy_notify_recv(int status, ompi_process_name_t* sender,
|
||||
}
|
||||
message->owning_job = (mca_ns_base_jobid_t)i;
|
||||
|
||||
if (OMPI_SUCCESS != ompi_unpack(buffer, &id_tag, 1, OMPI_INT32)) {
|
||||
if (OMPI_SUCCESS != ompi_unpack(buffer, &i, 1, OMPI_INT32)) {
|
||||
goto RETURN_ERROR;
|
||||
}
|
||||
id_tag = (ompi_registry_notify_id_t)i;
|
||||
|
||||
if (mca_gpr_proxy_debug) {
|
||||
ompi_output(0, "[%d,%d,%d] trigger from segment %s id %d",
|
||||
OMPI_NAME_ARGS(*ompi_rte_get_self()), message->segment, (int)id_tag);
|
||||
}
|
||||
|
||||
if (OMPI_SUCCESS != ompi_unpack(buffer, &message->trig_action, 1, MCA_GPR_OOB_PACK_ACTION)) {
|
||||
goto RETURN_ERROR;
|
||||
@ -330,6 +338,8 @@ void mca_gpr_proxy_notify_recv(int status, ompi_process_name_t* sender,
|
||||
for (trackptr = (mca_gpr_proxy_notify_request_tracker_t*)ompi_list_get_first(&mca_gpr_proxy_notify_request_tracker);
|
||||
trackptr != (mca_gpr_proxy_notify_request_tracker_t*)ompi_list_get_end(&mca_gpr_proxy_notify_request_tracker) && !found;
|
||||
trackptr = (mca_gpr_proxy_notify_request_tracker_t*)ompi_list_get_next(trackptr)) {
|
||||
ompi_output(0, "\tchecking trigger %d for segment %s\n", trackptr->local_idtag,
|
||||
trackptr->segment);
|
||||
if (trackptr->local_idtag == id_tag) {
|
||||
found = true;
|
||||
}
|
||||
|
@ -84,6 +84,12 @@ mca_gpr_proxy_subscribe(ompi_registry_mode_t mode,
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
if (mca_gpr_proxy_debug) {
|
||||
ompi_output(0, "[%d,%d,%d] gpr proxy subscribe: subscribing to segment %s local idtag %d",
|
||||
OMPI_NAME_ARGS(*ompi_rte_get_self()), segment, (int)idtag);
|
||||
}
|
||||
|
||||
|
||||
if (0 > mca_oob_send_packed(mca_gpr_my_replica, cmd, MCA_OOB_TAG_GPR, 0)) {
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
@ -232,6 +232,12 @@ bool mca_gpr_replica_process_triggers(mca_gpr_replica_segment_t *seg,
|
||||
cb->user_tag = NULL;
|
||||
cb->message = message;
|
||||
cb->remote_idtag = trackptr->remote_idtag;
|
||||
if (mca_gpr_replica_debug) {
|
||||
ompi_output(0, "[%d,%d,%d] process_trig: queueing message for [%d,%d,%d] with idtag %d using remoteid %d\n",
|
||||
OMPI_NAME_ARGS(*ompi_rte_get_self()), OMPI_NAME_ARGS(*(cb->requestor)),
|
||||
(int)cb->remote_idtag, (int)trackptr->remote_idtag);
|
||||
}
|
||||
|
||||
}
|
||||
ompi_list_append(&mca_gpr_replica_callbacks, &cb->item);
|
||||
|
||||
@ -316,8 +322,7 @@ mca_gpr_replica_enter_notify_request(mca_gpr_replica_segment_t *seg,
|
||||
trackptr->segptr = seg;
|
||||
trackptr->action = action;
|
||||
trackptr->requestor = ompi_name_server.copy_process_name(requestor);
|
||||
trackptr->local_idtag = idtag;
|
||||
trackptr->remote_idtag = OMPI_REGISTRY_NOTIFY_ID_MAX;
|
||||
trackptr->remote_idtag = idtag;
|
||||
trackptr->callback = cb_func;
|
||||
trackptr->user_tag = user_tag;
|
||||
if (ompi_list_is_empty(&mca_gpr_replica_free_notify_id_tags)) {
|
||||
|
@ -926,6 +926,11 @@ static ompi_registry_notify_id_t mca_gpr_replica_recv_subscribe_cmd(ompi_process
|
||||
|
||||
if (NULL != sender) { /* remote sender */
|
||||
|
||||
if (mca_gpr_replica_debug) {
|
||||
ompi_output(0, "[%d,%d,%d] subscribe created for remote sender [%d,%d,%d] on segment %s for idtag %d",
|
||||
OMPI_NAME_ARGS(*ompi_rte_get_self()), OMPI_NAME_ARGS(*sender), segment, id_tag);
|
||||
}
|
||||
|
||||
/* enter request on local notify tracking system */
|
||||
local_idtag1 = mca_gpr_replica_enter_notify_request(seg, action, sender, id_tag, NULL, NULL);
|
||||
|
||||
|
@ -323,8 +323,7 @@ void mca_gpr_replica_remote_notify(ompi_process_name_t *recipient, int recipient
|
||||
if (OMPI_SUCCESS != ompi_pack(msg, regval->object, regval->object_size, OMPI_BYTE)) {
|
||||
return;
|
||||
}
|
||||
/* TSW - should we add */
|
||||
/* OBJ_RELEASE(regval); */
|
||||
OBJ_RELEASE(regval);
|
||||
}
|
||||
}
|
||||
if (OMPI_SUCCESS != ompi_pack(msg, &message->num_tokens, 1, OMPI_INT32)) {
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user