1
1

Add a bunch of debug to help track down the problem, and eventually find another place where comparison of signatures was incorrectly performed - use the dss compare operation to be consistent and safe

This commit was SVN r32620.
Этот коммит содержится в:
Ralph Castain 2014-08-27 19:52:20 +00:00
родитель 5fb7c7d23b
Коммит 731a878ff3
5 изменённых файлов: 55 добавлений и 10 удалений

Просмотреть файл

@ -197,11 +197,7 @@ orte_grpcomm_coll_t* orte_grpcomm_base_get_tracker(orte_grpcomm_signature_t *sig
/* if only one is NULL, then we can't possibly match */ /* if only one is NULL, then we can't possibly match */
break; break;
} }
/* if the size doesn't match, then they can't be the same */ if (OPAL_EQUAL == opal_dss.compare(sig, coll->sig, ORTE_SIGNATURE)) {
if (sig->sz != coll->sig->sz) {
continue;
}
if (0 == memcmp(sig->signature, coll->sig->signature, coll->sig->sz)) {
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:base:returning existing collective", "%s grpcomm:base:returning existing collective",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
@ -217,9 +213,13 @@ orte_grpcomm_coll_t* orte_grpcomm_base_get_tracker(orte_grpcomm_signature_t *sig
return NULL; return NULL;
} }
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output, if (1 < opal_output_get_verbosity(orte_grpcomm_base_framework.framework_output)) {
"%s grpcomm:base: creating new coll", char *tmp=NULL;
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); (void)opal_dss.print(&tmp, NULL, sig, ORTE_SIGNATURE);
opal_output(0, "%s grpcomm:base: creating new coll for procs %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp);
free(tmp);
}
coll = OBJ_NEW(orte_grpcomm_coll_t); coll = OBJ_NEW(orte_grpcomm_coll_t);
OBJ_RETAIN(sig); OBJ_RETAIN(sig);
coll->sig = sig; coll->sig = sig;

Просмотреть файл

@ -437,8 +437,8 @@ static void xcast_recv(int status, orte_process_name_t* sender,
nm = (orte_namelist_t*)item; nm = (orte_namelist_t*)item;
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:direct:send_relay sending relay msg to %s", "%s grpcomm:direct:send_relay sending relay msg of %d bytes to %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)rly->bytes_used,
ORTE_NAME_PRINT(&nm->name))); ORTE_NAME_PRINT(&nm->name)));
OBJ_RETAIN(rly); OBJ_RETAIN(rly);
/* check the state of the recipient - no point /* check the state of the recipient - no point

Просмотреть файл

@ -676,6 +676,13 @@ static void pmix_server_release(int status,
pmix_server_peer_t *peer; pmix_server_peer_t *peer;
opal_buffer_t *reply; opal_buffer_t *reply;
if (2 < opal_output_get_verbosity(pmix_server_output)) {
char *tmp=NULL;
(void)opal_dss.print(&tmp, NULL, trk->sig, ORTE_SIGNATURE);
opal_output(0, "%s pmix_server release called on tracker %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp);
free(tmp);
}
opal_output_verbose(2, pmix_server_output, opal_output_verbose(2, pmix_server_output,
"%s pmix:server:release coll release recvd", "%s pmix:server:release coll release recvd",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));

Просмотреть файл

@ -715,6 +715,14 @@ static void process_message(pmix_server_peer_t *peer)
goto reply_fence; goto reply_fence;
} }
} }
if (4 < opal_output_get_verbosity(pmix_server_output)) {
char *tmp=NULL;
(void)opal_dss.print(&tmp, NULL, sig, ORTE_SIGNATURE);
opal_output(0, "%s %s called with procs %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(PMIX_FENCENB_CMD == cmd) ? "FENCE_NB" : "FENCE", tmp);
free(tmp);
}
/* get the URI for this process */ /* get the URI for this process */
cnt = 1; cnt = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(&xfer, &local_uri, &cnt, OPAL_STRING))) { if (OPAL_SUCCESS != (rc = opal_dss.unpack(&xfer, &local_uri, &cnt, OPAL_STRING))) {

Просмотреть файл

@ -895,6 +895,36 @@ int orte_dt_print_attr(char **output, char *prefix,
int orte_dt_print_sig(char **output, char *prefix, orte_grpcomm_signature_t *src, opal_data_type_t type) int orte_dt_print_sig(char **output, char *prefix, orte_grpcomm_signature_t *src, opal_data_type_t type)
{ {
char *prefx;
size_t i;
char *tmp, *tmp2;
/* deal with NULL prefix */
if (NULL == prefix) asprintf(&prefx, " ");
else prefx = strdup(prefix);
/* if src is NULL, just print data type and return */
if (NULL == src) {
asprintf(output, "%sData type: ORTE_SIG\tValue: NULL pointer", prefx);
free(prefx);
return OPAL_SUCCESS;
}
if (NULL == src->signature) {
asprintf(output, "%sORTE_SIG\tValue: NULL", prefx);
free(prefx);
return ORTE_SUCCESS;
}
/* there must be at least one */
asprintf(&tmp, "%sORTE_SIG\tValue: ", prefx);
for (i=0; i < src->sz; i++) {
asprintf(&tmp2, "%s%s", tmp, ORTE_NAME_PRINT(&src->signature[i]));
free(tmp);
tmp = tmp2;
}
*output = tmp;
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }