Add some further debug to the dstore framework. When doing comm_spawn, we have to exchange any provided cpu bitmaps to ensure both sides compute the same locality, else various mpi frameworks can go bonkers.
This commit was SVN r31572.
Этот коммит содержится в:
родитель
a286977eff
Коммит
087b84b0ef
@ -194,9 +194,15 @@ int ompi_proc_set_locality(ompi_proc_t *proc)
|
|||||||
|
|
||||||
/* retrieve the binding for the other proc */
|
/* retrieve the binding for the other proc */
|
||||||
OBJ_CONSTRUCT(&myvals, opal_list_t);
|
OBJ_CONSTRUCT(&myvals, opal_list_t);
|
||||||
if (OMPI_SUCCESS != opal_dstore.fetch(opal_dstore_internal,
|
if (OMPI_SUCCESS != (ret = opal_dstore.fetch(opal_dstore_internal,
|
||||||
(opal_identifier_t*)&proc->proc_name,
|
(opal_identifier_t*)&proc->proc_name,
|
||||||
OPAL_DSTORE_CPUSET, &myvals)) {
|
OPAL_DSTORE_CPUSET, &myvals))) {
|
||||||
|
/* check the nonpeer data in case of comm_spawn */
|
||||||
|
ret = opal_dstore.fetch(opal_dstore_nonpeer,
|
||||||
|
(opal_identifier_t*)&proc->proc_name,
|
||||||
|
OPAL_DSTORE_CPUSET, &myvals);
|
||||||
|
}
|
||||||
|
if (OMPI_SUCCESS != ret) {
|
||||||
/* we don't know their cpuset, so nothing more we can say */
|
/* we don't know their cpuset, so nothing more we can say */
|
||||||
locality = OPAL_PROC_ON_NODE;
|
locality = OPAL_PROC_ON_NODE;
|
||||||
} else {
|
} else {
|
||||||
|
@ -88,6 +88,9 @@ int opal_dstore_base_store(int dstorehandle,
|
|||||||
return OPAL_ERR_NOT_FOUND;
|
return OPAL_ERR_NOT_FOUND;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
opal_output_verbose(1, opal_dstore_base_framework.framework_output,
|
||||||
|
"storing data in %s dstore", (NULL == hdl->name) ? "NULL" : hdl->name);
|
||||||
|
|
||||||
return hdl->module->store((struct opal_dstore_base_module_t*)hdl->module, id, kv);
|
return hdl->module->store((struct opal_dstore_base_module_t*)hdl->module, id, kv);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -102,6 +105,8 @@ void opal_dstore_base_commit(int dstorehandle,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (NULL != hdl->module->commit) {
|
if (NULL != hdl->module->commit) {
|
||||||
|
opal_output_verbose(1, opal_dstore_base_framework.framework_output,
|
||||||
|
"committing data in %s dstore", (NULL == hdl->name) ? "NULL" : hdl->name);
|
||||||
hdl->module->commit((struct opal_dstore_base_module_t*)hdl->module, id);
|
hdl->module->commit((struct opal_dstore_base_module_t*)hdl->module, id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -119,6 +124,9 @@ int opal_dstore_base_fetch(int dstorehandle,
|
|||||||
return OPAL_ERR_NOT_FOUND;
|
return OPAL_ERR_NOT_FOUND;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
opal_output_verbose(1, opal_dstore_base_framework.framework_output,
|
||||||
|
"fetching data from %s dstore", (NULL == hdl->name) ? "NULL" : hdl->name);
|
||||||
|
|
||||||
return hdl->module->fetch((struct opal_dstore_base_module_t*)hdl->module, id, key, kvs);
|
return hdl->module->fetch((struct opal_dstore_base_module_t*)hdl->module, id, key, kvs);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -133,6 +141,9 @@ int opal_dstore_base_remove_data(int dstorehandle,
|
|||||||
return OPAL_ERR_NOT_FOUND;
|
return OPAL_ERR_NOT_FOUND;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
opal_output_verbose(1, opal_dstore_base_framework.framework_output,
|
||||||
|
"removing data from %s dstore", (NULL == hdl->name) ? "NULL" : hdl->name);
|
||||||
|
|
||||||
return hdl->module->remove((struct opal_dstore_base_module_t*)hdl->module, id, key);
|
return hdl->module->remove((struct opal_dstore_base_module_t*)hdl->module, id, key);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -179,6 +179,9 @@ static int fetch(struct opal_dstore_base_module_t *imod,
|
|||||||
OPAL_ERROR_LOG(rc);
|
OPAL_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
OPAL_OUTPUT_VERBOSE((5, opal_dstore_base_framework.framework_output,
|
||||||
|
"dstore:hash:fetch: adding data for key %s on proc %" PRIu64 "",
|
||||||
|
(NULL == kv->key) ? "NULL" : kv->key, id));
|
||||||
/* add it to the output list */
|
/* add it to the output list */
|
||||||
opal_list_append(kvs, &knew->super);
|
opal_list_append(kvs, &knew->super);
|
||||||
}
|
}
|
||||||
|
@ -1151,6 +1151,13 @@ int orte_util_decode_pidmap(opal_byte_object_t *bo)
|
|||||||
OBJ_DESTRUCT(&kv);
|
OBJ_DESTRUCT(&kv);
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
/* also need a copy in nonpeer to support dynamic spawns */
|
||||||
|
if (ORTE_SUCCESS != (rc = opal_dstore.store(opal_dstore_nonpeer,
|
||||||
|
(opal_identifier_t*)&proc, &kv))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
OBJ_DESTRUCT(&kv);
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
OBJ_DESTRUCT(&kv);
|
OBJ_DESTRUCT(&kv);
|
||||||
free(cpu_bitmap);
|
free(cpu_bitmap);
|
||||||
}
|
}
|
||||||
|
Загрузка…
Ссылка в новой задаче
Block a user