Add some further debug to the dstore framework. When doing comm_spawn, we have to exchange any provided cpu bitmaps to ensure both sides compute the same locality, else various mpi frameworks can go bonkers.
This commit was SVN r31572.
Этот коммит содержится в:
родитель
a286977eff
Коммит
087b84b0ef
@ -194,9 +194,15 @@ int ompi_proc_set_locality(ompi_proc_t *proc)
|
||||
|
||||
/* retrieve the binding for the other proc */
|
||||
OBJ_CONSTRUCT(&myvals, opal_list_t);
|
||||
if (OMPI_SUCCESS != opal_dstore.fetch(opal_dstore_internal,
|
||||
(opal_identifier_t*)&proc->proc_name,
|
||||
OPAL_DSTORE_CPUSET, &myvals)) {
|
||||
if (OMPI_SUCCESS != (ret = opal_dstore.fetch(opal_dstore_internal,
|
||||
(opal_identifier_t*)&proc->proc_name,
|
||||
OPAL_DSTORE_CPUSET, &myvals))) {
|
||||
/* check the nonpeer data in case of comm_spawn */
|
||||
ret = opal_dstore.fetch(opal_dstore_nonpeer,
|
||||
(opal_identifier_t*)&proc->proc_name,
|
||||
OPAL_DSTORE_CPUSET, &myvals);
|
||||
}
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
/* we don't know their cpuset, so nothing more we can say */
|
||||
locality = OPAL_PROC_ON_NODE;
|
||||
} else {
|
||||
|
@ -88,6 +88,9 @@ int opal_dstore_base_store(int dstorehandle,
|
||||
return OPAL_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
opal_output_verbose(1, opal_dstore_base_framework.framework_output,
|
||||
"storing data in %s dstore", (NULL == hdl->name) ? "NULL" : hdl->name);
|
||||
|
||||
return hdl->module->store((struct opal_dstore_base_module_t*)hdl->module, id, kv);
|
||||
}
|
||||
|
||||
@ -102,6 +105,8 @@ void opal_dstore_base_commit(int dstorehandle,
|
||||
}
|
||||
|
||||
if (NULL != hdl->module->commit) {
|
||||
opal_output_verbose(1, opal_dstore_base_framework.framework_output,
|
||||
"committing data in %s dstore", (NULL == hdl->name) ? "NULL" : hdl->name);
|
||||
hdl->module->commit((struct opal_dstore_base_module_t*)hdl->module, id);
|
||||
}
|
||||
}
|
||||
@ -119,6 +124,9 @@ int opal_dstore_base_fetch(int dstorehandle,
|
||||
return OPAL_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
opal_output_verbose(1, opal_dstore_base_framework.framework_output,
|
||||
"fetching data from %s dstore", (NULL == hdl->name) ? "NULL" : hdl->name);
|
||||
|
||||
return hdl->module->fetch((struct opal_dstore_base_module_t*)hdl->module, id, key, kvs);
|
||||
}
|
||||
|
||||
@ -133,6 +141,9 @@ int opal_dstore_base_remove_data(int dstorehandle,
|
||||
return OPAL_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
opal_output_verbose(1, opal_dstore_base_framework.framework_output,
|
||||
"removing data from %s dstore", (NULL == hdl->name) ? "NULL" : hdl->name);
|
||||
|
||||
return hdl->module->remove((struct opal_dstore_base_module_t*)hdl->module, id, key);
|
||||
}
|
||||
|
||||
|
@ -179,6 +179,9 @@ static int fetch(struct opal_dstore_base_module_t *imod,
|
||||
OPAL_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((5, opal_dstore_base_framework.framework_output,
|
||||
"dstore:hash:fetch: adding data for key %s on proc %" PRIu64 "",
|
||||
(NULL == kv->key) ? "NULL" : kv->key, id));
|
||||
/* add it to the output list */
|
||||
opal_list_append(kvs, &knew->super);
|
||||
}
|
||||
|
@ -1151,6 +1151,13 @@ int orte_util_decode_pidmap(opal_byte_object_t *bo)
|
||||
OBJ_DESTRUCT(&kv);
|
||||
goto cleanup;
|
||||
}
|
||||
/* also need a copy in nonpeer to support dynamic spawns */
|
||||
if (ORTE_SUCCESS != (rc = opal_dstore.store(opal_dstore_nonpeer,
|
||||
(opal_identifier_t*)&proc, &kv))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&kv);
|
||||
goto cleanup;
|
||||
}
|
||||
OBJ_DESTRUCT(&kv);
|
||||
free(cpu_bitmap);
|
||||
}
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user