1
1

Add some further debug to the dstore framework. When doing comm_spawn, we have to exchange any provided cpu bitmaps to ensure both sides compute the same locality, else various mpi frameworks can go bonkers.

This commit was SVN r31572.
Этот коммит содержится в:
Ralph Castain 2014-04-30 19:29:00 +00:00
родитель a286977eff
Коммит 087b84b0ef
4 изменённых файлов: 30 добавлений и 3 удалений

Просмотреть файл

@ -194,9 +194,15 @@ int ompi_proc_set_locality(ompi_proc_t *proc)
/* retrieve the binding for the other proc */ /* retrieve the binding for the other proc */
OBJ_CONSTRUCT(&myvals, opal_list_t); OBJ_CONSTRUCT(&myvals, opal_list_t);
if (OMPI_SUCCESS != opal_dstore.fetch(opal_dstore_internal, if (OMPI_SUCCESS != (ret = opal_dstore.fetch(opal_dstore_internal,
(opal_identifier_t*)&proc->proc_name, (opal_identifier_t*)&proc->proc_name,
OPAL_DSTORE_CPUSET, &myvals)) { OPAL_DSTORE_CPUSET, &myvals))) {
/* check the nonpeer data in case of comm_spawn */
ret = opal_dstore.fetch(opal_dstore_nonpeer,
(opal_identifier_t*)&proc->proc_name,
OPAL_DSTORE_CPUSET, &myvals);
}
if (OMPI_SUCCESS != ret) {
/* we don't know their cpuset, so nothing more we can say */ /* we don't know their cpuset, so nothing more we can say */
locality = OPAL_PROC_ON_NODE; locality = OPAL_PROC_ON_NODE;
} else { } else {

Просмотреть файл

@ -88,6 +88,9 @@ int opal_dstore_base_store(int dstorehandle,
return OPAL_ERR_NOT_FOUND; return OPAL_ERR_NOT_FOUND;
} }
opal_output_verbose(1, opal_dstore_base_framework.framework_output,
"storing data in %s dstore", (NULL == hdl->name) ? "NULL" : hdl->name);
return hdl->module->store((struct opal_dstore_base_module_t*)hdl->module, id, kv); return hdl->module->store((struct opal_dstore_base_module_t*)hdl->module, id, kv);
} }
@ -102,6 +105,8 @@ void opal_dstore_base_commit(int dstorehandle,
} }
if (NULL != hdl->module->commit) { if (NULL != hdl->module->commit) {
opal_output_verbose(1, opal_dstore_base_framework.framework_output,
"committing data in %s dstore", (NULL == hdl->name) ? "NULL" : hdl->name);
hdl->module->commit((struct opal_dstore_base_module_t*)hdl->module, id); hdl->module->commit((struct opal_dstore_base_module_t*)hdl->module, id);
} }
} }
@ -119,6 +124,9 @@ int opal_dstore_base_fetch(int dstorehandle,
return OPAL_ERR_NOT_FOUND; return OPAL_ERR_NOT_FOUND;
} }
opal_output_verbose(1, opal_dstore_base_framework.framework_output,
"fetching data from %s dstore", (NULL == hdl->name) ? "NULL" : hdl->name);
return hdl->module->fetch((struct opal_dstore_base_module_t*)hdl->module, id, key, kvs); return hdl->module->fetch((struct opal_dstore_base_module_t*)hdl->module, id, key, kvs);
} }
@ -133,6 +141,9 @@ int opal_dstore_base_remove_data(int dstorehandle,
return OPAL_ERR_NOT_FOUND; return OPAL_ERR_NOT_FOUND;
} }
opal_output_verbose(1, opal_dstore_base_framework.framework_output,
"removing data from %s dstore", (NULL == hdl->name) ? "NULL" : hdl->name);
return hdl->module->remove((struct opal_dstore_base_module_t*)hdl->module, id, key); return hdl->module->remove((struct opal_dstore_base_module_t*)hdl->module, id, key);
} }

Просмотреть файл

@ -179,6 +179,9 @@ static int fetch(struct opal_dstore_base_module_t *imod,
OPAL_ERROR_LOG(rc); OPAL_ERROR_LOG(rc);
return rc; return rc;
} }
OPAL_OUTPUT_VERBOSE((5, opal_dstore_base_framework.framework_output,
"dstore:hash:fetch: adding data for key %s on proc %" PRIu64 "",
(NULL == kv->key) ? "NULL" : kv->key, id));
/* add it to the output list */ /* add it to the output list */
opal_list_append(kvs, &knew->super); opal_list_append(kvs, &knew->super);
} }

Просмотреть файл

@ -1151,6 +1151,13 @@ int orte_util_decode_pidmap(opal_byte_object_t *bo)
OBJ_DESTRUCT(&kv); OBJ_DESTRUCT(&kv);
goto cleanup; goto cleanup;
} }
/* also need a copy in nonpeer to support dynamic spawns */
if (ORTE_SUCCESS != (rc = opal_dstore.store(opal_dstore_nonpeer,
(opal_identifier_t*)&proc, &kv))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&kv);
goto cleanup;
}
OBJ_DESTRUCT(&kv); OBJ_DESTRUCT(&kv);
free(cpu_bitmap); free(cpu_bitmap);
} }