Add a bunch of debug to help track down the problem, and eventually find another place where comparison of signatures was incorrectly performed - use the dss compare operation to be consistent and safe
This commit was SVN r32620.
Этот коммит содержится в:
родитель
5fb7c7d23b
Коммит
731a878ff3
@ -197,11 +197,7 @@ orte_grpcomm_coll_t* orte_grpcomm_base_get_tracker(orte_grpcomm_signature_t *sig
|
||||
/* if only one is NULL, then we can't possibly match */
|
||||
break;
|
||||
}
|
||||
/* if the size doesn't match, then they can't be the same */
|
||||
if (sig->sz != coll->sig->sz) {
|
||||
continue;
|
||||
}
|
||||
if (0 == memcmp(sig->signature, coll->sig->signature, coll->sig->sz)) {
|
||||
if (OPAL_EQUAL == opal_dss.compare(sig, coll->sig, ORTE_SIGNATURE)) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output,
|
||||
"%s grpcomm:base:returning existing collective",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
@ -217,9 +213,13 @@ orte_grpcomm_coll_t* orte_grpcomm_base_get_tracker(orte_grpcomm_signature_t *sig
|
||||
|
||||
return NULL;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output,
|
||||
"%s grpcomm:base: creating new coll",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
if (1 < opal_output_get_verbosity(orte_grpcomm_base_framework.framework_output)) {
|
||||
char *tmp=NULL;
|
||||
(void)opal_dss.print(&tmp, NULL, sig, ORTE_SIGNATURE);
|
||||
opal_output(0, "%s grpcomm:base: creating new coll for procs %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp);
|
||||
free(tmp);
|
||||
}
|
||||
coll = OBJ_NEW(orte_grpcomm_coll_t);
|
||||
OBJ_RETAIN(sig);
|
||||
coll->sig = sig;
|
||||
|
@ -437,8 +437,8 @@ static void xcast_recv(int status, orte_process_name_t* sender,
|
||||
nm = (orte_namelist_t*)item;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
|
||||
"%s grpcomm:direct:send_relay sending relay msg to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
"%s grpcomm:direct:send_relay sending relay msg of %d bytes to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)rly->bytes_used,
|
||||
ORTE_NAME_PRINT(&nm->name)));
|
||||
OBJ_RETAIN(rly);
|
||||
/* check the state of the recipient - no point
|
||||
|
@ -676,6 +676,13 @@ static void pmix_server_release(int status,
|
||||
pmix_server_peer_t *peer;
|
||||
opal_buffer_t *reply;
|
||||
|
||||
if (2 < opal_output_get_verbosity(pmix_server_output)) {
|
||||
char *tmp=NULL;
|
||||
(void)opal_dss.print(&tmp, NULL, trk->sig, ORTE_SIGNATURE);
|
||||
opal_output(0, "%s pmix_server release called on tracker %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp);
|
||||
free(tmp);
|
||||
}
|
||||
opal_output_verbose(2, pmix_server_output,
|
||||
"%s pmix:server:release coll release recvd",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
@ -715,6 +715,14 @@ static void process_message(pmix_server_peer_t *peer)
|
||||
goto reply_fence;
|
||||
}
|
||||
}
|
||||
if (4 < opal_output_get_verbosity(pmix_server_output)) {
|
||||
char *tmp=NULL;
|
||||
(void)opal_dss.print(&tmp, NULL, sig, ORTE_SIGNATURE);
|
||||
opal_output(0, "%s %s called with procs %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(PMIX_FENCENB_CMD == cmd) ? "FENCE_NB" : "FENCE", tmp);
|
||||
free(tmp);
|
||||
}
|
||||
/* get the URI for this process */
|
||||
cnt = 1;
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.unpack(&xfer, &local_uri, &cnt, OPAL_STRING))) {
|
||||
|
@ -895,6 +895,36 @@ int orte_dt_print_attr(char **output, char *prefix,
|
||||
|
||||
int orte_dt_print_sig(char **output, char *prefix, orte_grpcomm_signature_t *src, opal_data_type_t type)
|
||||
{
|
||||
char *prefx;
|
||||
size_t i;
|
||||
char *tmp, *tmp2;
|
||||
|
||||
/* deal with NULL prefix */
|
||||
if (NULL == prefix) asprintf(&prefx, " ");
|
||||
else prefx = strdup(prefix);
|
||||
|
||||
/* if src is NULL, just print data type and return */
|
||||
if (NULL == src) {
|
||||
asprintf(output, "%sData type: ORTE_SIG\tValue: NULL pointer", prefx);
|
||||
free(prefx);
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
if (NULL == src->signature) {
|
||||
asprintf(output, "%sORTE_SIG\tValue: NULL", prefx);
|
||||
free(prefx);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* there must be at least one */
|
||||
asprintf(&tmp, "%sORTE_SIG\tValue: ", prefx);
|
||||
|
||||
for (i=0; i < src->sz; i++) {
|
||||
asprintf(&tmp2, "%s%s", tmp, ORTE_NAME_PRINT(&src->signature[i]));
|
||||
free(tmp);
|
||||
tmp = tmp2;
|
||||
}
|
||||
*output = tmp;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user