1
1

Add some further debug to the dstore framework. When doing comm_spawn, we have to exchange any provided cpu bitmaps to ensure both sides compute the same locality, else various mpi frameworks can go bonkers.

This commit was SVN r31572.
Этот коммит содержится в:
Ralph Castain 2014-04-30 19:29:00 +00:00
родитель a286977eff
Коммит 087b84b0ef
4 изменённых файлов: 30 добавлений и 3 удалений

Просмотреть файл

@ -194,9 +194,15 @@ int ompi_proc_set_locality(ompi_proc_t *proc)
/* retrieve the binding for the other proc */
OBJ_CONSTRUCT(&myvals, opal_list_t);
if (OMPI_SUCCESS != opal_dstore.fetch(opal_dstore_internal,
(opal_identifier_t*)&proc->proc_name,
OPAL_DSTORE_CPUSET, &myvals)) {
if (OMPI_SUCCESS != (ret = opal_dstore.fetch(opal_dstore_internal,
(opal_identifier_t*)&proc->proc_name,
OPAL_DSTORE_CPUSET, &myvals))) {
/* check the nonpeer data in case of comm_spawn */
ret = opal_dstore.fetch(opal_dstore_nonpeer,
(opal_identifier_t*)&proc->proc_name,
OPAL_DSTORE_CPUSET, &myvals);
}
if (OMPI_SUCCESS != ret) {
/* we don't know their cpuset, so nothing more we can say */
locality = OPAL_PROC_ON_NODE;
} else {

Просмотреть файл

@ -88,6 +88,9 @@ int opal_dstore_base_store(int dstorehandle,
return OPAL_ERR_NOT_FOUND;
}
opal_output_verbose(1, opal_dstore_base_framework.framework_output,
"storing data in %s dstore", (NULL == hdl->name) ? "NULL" : hdl->name);
return hdl->module->store((struct opal_dstore_base_module_t*)hdl->module, id, kv);
}
@ -102,6 +105,8 @@ void opal_dstore_base_commit(int dstorehandle,
}
if (NULL != hdl->module->commit) {
opal_output_verbose(1, opal_dstore_base_framework.framework_output,
"committing data in %s dstore", (NULL == hdl->name) ? "NULL" : hdl->name);
hdl->module->commit((struct opal_dstore_base_module_t*)hdl->module, id);
}
}
@ -119,6 +124,9 @@ int opal_dstore_base_fetch(int dstorehandle,
return OPAL_ERR_NOT_FOUND;
}
opal_output_verbose(1, opal_dstore_base_framework.framework_output,
"fetching data from %s dstore", (NULL == hdl->name) ? "NULL" : hdl->name);
return hdl->module->fetch((struct opal_dstore_base_module_t*)hdl->module, id, key, kvs);
}
@ -133,6 +141,9 @@ int opal_dstore_base_remove_data(int dstorehandle,
return OPAL_ERR_NOT_FOUND;
}
opal_output_verbose(1, opal_dstore_base_framework.framework_output,
"removing data from %s dstore", (NULL == hdl->name) ? "NULL" : hdl->name);
return hdl->module->remove((struct opal_dstore_base_module_t*)hdl->module, id, key);
}

Просмотреть файл

@ -179,6 +179,9 @@ static int fetch(struct opal_dstore_base_module_t *imod,
OPAL_ERROR_LOG(rc);
return rc;
}
OPAL_OUTPUT_VERBOSE((5, opal_dstore_base_framework.framework_output,
"dstore:hash:fetch: adding data for key %s on proc %" PRIu64 "",
(NULL == kv->key) ? "NULL" : kv->key, id));
/* add it to the output list */
opal_list_append(kvs, &knew->super);
}

Просмотреть файл

@ -1151,6 +1151,13 @@ int orte_util_decode_pidmap(opal_byte_object_t *bo)
OBJ_DESTRUCT(&kv);
goto cleanup;
}
/* also need a copy in nonpeer to support dynamic spawns */
if (ORTE_SUCCESS != (rc = opal_dstore.store(opal_dstore_nonpeer,
(opal_identifier_t*)&proc, &kv))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
goto cleanup;
}
OBJ_DESTRUCT(&kv);
free(cpu_bitmap);
}