Revert r29917 and replace it with a fix that resolves the thread deadlock while retaining the desired debug info. In an earlier commit, we had changed the modex accordingly:
* automatically retrieve the hostname (and all RTE info) for all procs during MPI_Init if nprocs < cutoff * if nprocs > cutoff, retrieve the hostname (and all RTE info) for a proc upon the first call to modex_recv for that proc. This would provide the hostname for debugging purposes as we only report errors on messages, and so we must have called modex_recv to get the endpoint info * BTLs are not to call modex_recv until they need the endpoint info for first message - i.e., not during add_procs so we don't call it for every process in the job, but only those with whom we communicate My understanding is that only some BTLs have been modified to meet that third requirement, but those include the Cray ones where jobs are big enough that launch times were becoming an issue. Other BTLs would hopefully be modified as time went on and interest in using them at scale arose. Meantime, those BTLs would call modex_recv on every proc, and we would therefore be no worse than the prior behavior. This commit revises the MPI-RTE interface to pass the ompi_proc_t instead of the ompi_process_name_t for the proc so that the hostname can be easily inserted. I have advised the ORNL folks of the change. cmr=v1.7.4:reviewer=jsquyres:subject=Fix thread deadlock This commit was SVN r29931. The following SVN revision numbers were found above: r29917 --> open-mpi/ompi@1a972e2c9d
Этот коммит содержится в:
родитель
f13a37637f
Коммит
0995a6f3b9
@ -21,6 +21,7 @@
|
||||
#include "ompi/constants.h"
|
||||
|
||||
#include "ompi/info/info.h"
|
||||
struct ompi_proc_t;
|
||||
|
||||
#include "orte/types.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
@ -89,13 +90,13 @@ OMPI_DECLSPEC void ompi_rte_wait_for_debugger(void);
|
||||
/* Database operations */
|
||||
OMPI_DECLSPEC int ompi_rte_db_store(const ompi_process_name_t *nm, const char* key,
|
||||
const void *data, opal_data_type_t type);
|
||||
OMPI_DECLSPEC int ompi_rte_db_fetch(const ompi_process_name_t *nm,
|
||||
OMPI_DECLSPEC int ompi_rte_db_fetch(const struct ompi_proc_t *proc,
|
||||
const char *key,
|
||||
void **data, opal_data_type_t type);
|
||||
OMPI_DECLSPEC int ompi_rte_db_fetch_pointer(const ompi_process_name_t *nm,
|
||||
OMPI_DECLSPEC int ompi_rte_db_fetch_pointer(const struct ompi_proc_t *proc,
|
||||
const char *key,
|
||||
void **data, opal_data_type_t type);
|
||||
OMPI_DECLSPEC int ompi_rte_db_fetch_multiple(const ompi_process_name_t *nm,
|
||||
OMPI_DECLSPEC int ompi_rte_db_fetch_multiple(const struct ompi_proc_t *proc,
|
||||
const char *key,
|
||||
opal_list_t *kvs);
|
||||
OMPI_DECLSPEC int ompi_rte_db_remove(const ompi_process_name_t *nm,
|
||||
|
@ -143,44 +143,53 @@ int ompi_rte_db_store(const orte_process_name_t *nm, const char* key,
|
||||
return opal_db.store((opal_identifier_t*)nm, OPAL_SCOPE_GLOBAL, key, data, type);
|
||||
}
|
||||
|
||||
int ompi_rte_db_fetch(const orte_process_name_t *nm,
|
||||
int ompi_rte_db_fetch(const struct ompi_proc_t *proc,
|
||||
const char *key,
|
||||
void **data, opal_data_type_t type)
|
||||
{
|
||||
ompi_proc_t *proct;
|
||||
int rc;
|
||||
|
||||
if (OPAL_SUCCESS != (rc = opal_db.fetch((opal_identifier_t*)nm, key, data, type))) {
|
||||
if (OPAL_SUCCESS != (rc = opal_db.fetch((opal_identifier_t*)(&proc->proc_name), key, data, type))) {
|
||||
return rc;
|
||||
}
|
||||
/* update the hostname upon first call to modex-recv for this proc */
|
||||
if (NULL == proc->proc_hostname) {
|
||||
opal_db.fetch_pointer((opal_identifier_t*)(&proc->proc_name), ORTE_DB_HOSTNAME, (void**)&proc->proc_hostname, OPAL_STRING);
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_rte_db_fetch_pointer(const orte_process_name_t *nm,
|
||||
int ompi_rte_db_fetch_pointer(const struct ompi_proc_t *proc,
|
||||
const char *key,
|
||||
void **data, opal_data_type_t type)
|
||||
{
|
||||
ompi_proc_t *proct;
|
||||
int rc;
|
||||
|
||||
if (OPAL_SUCCESS != (rc = opal_db.fetch_pointer((opal_identifier_t*)nm, key, data, type))) {
|
||||
if (OPAL_SUCCESS != (rc = opal_db.fetch_pointer((opal_identifier_t*)(&proc->proc_name), key, data, type))) {
|
||||
return rc;
|
||||
}
|
||||
/* update the hostname upon first call to modex-recv for this proc */
|
||||
if (NULL == proc->proc_hostname) {
|
||||
opal_db.fetch_pointer((opal_identifier_t*)(&proc->proc_name), ORTE_DB_HOSTNAME, (void**)&proc->proc_hostname, OPAL_STRING);
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_rte_db_fetch_multiple(const orte_process_name_t *nm,
|
||||
int ompi_rte_db_fetch_multiple(const struct ompi_proc_t *proc,
|
||||
const char *key,
|
||||
opal_list_t *kvs)
|
||||
{
|
||||
ompi_proc_t *proct;
|
||||
int rc;
|
||||
|
||||
/* MPI processes are only concerned with shared info */
|
||||
if (OPAL_SUCCESS != (rc = opal_db.fetch_multiple((opal_identifier_t*)nm,
|
||||
if (OPAL_SUCCESS != (rc = opal_db.fetch_multiple((opal_identifier_t*)(&proc->proc_name),
|
||||
OPAL_SCOPE_GLOBAL, key, kvs))) {
|
||||
return rc;
|
||||
}
|
||||
/* update the hostname upon first call to modex-recv for this proc */
|
||||
if (NULL == proc->proc_hostname) {
|
||||
opal_db.fetch_pointer((opal_identifier_t*)(&proc->proc_name), ORTE_DB_HOSTNAME, (void**)&proc->proc_hostname, OPAL_STRING);
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -234,14 +234,23 @@ int ompi_proc_complete_init(void)
|
||||
}
|
||||
|
||||
if (ompi_process_info.num_procs < ompi_hostname_cutoff) {
|
||||
/* retrieve the hostname */
|
||||
/* IF the number of procs falls below the specified cutoff,
|
||||
* then we assume the job is small enough that retrieving
|
||||
* the hostname (which will typically cause retrieval of
|
||||
* ALL modex info for this proc) will have no appreciable
|
||||
* impact on launch scaling
|
||||
*/
|
||||
ret = ompi_modex_recv_string_pointer(OMPI_DB_HOSTNAME, proc, (void**)&(proc->proc_hostname), OPAL_STRING);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* just set the hostname to NULL for now - we'll fill it in
|
||||
* as modex_recv's are called for procs we will talk to
|
||||
* as modex_recv's are called for procs we will talk to, thus
|
||||
* avoiding retrieval of ALL modex info for this proc until
|
||||
* required. Transports that delay calling modex_recv until
|
||||
* first message will therefore scale better than those that
|
||||
* call modex_recv on all procs during init.
|
||||
*/
|
||||
proc->proc_hostname = NULL;
|
||||
}
|
||||
@ -470,14 +479,23 @@ int ompi_proc_refresh(void) {
|
||||
break;
|
||||
}
|
||||
if (ompi_process_info.num_procs < ompi_hostname_cutoff) {
|
||||
/* retrieve the hostname */
|
||||
/* IF the number of procs falls below the specified cutoff,
|
||||
* then we assume the job is small enough that retrieving
|
||||
* the hostname (which will typically cause retrieval of
|
||||
* ALL modex info for this proc) will have no appreciable
|
||||
* impact on launch scaling
|
||||
*/
|
||||
ret = ompi_modex_recv_string_pointer(OMPI_DB_HOSTNAME, proc, (void**)&(proc->proc_hostname), OPAL_STRING);
|
||||
if (OMPI_SUCCESS != ret) {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/* just set the hostname to NULL for now - we'll fill it in
|
||||
* as modex_recv's are called for procs we will talk to
|
||||
* as modex_recv's are called for procs we will talk to, thus
|
||||
* avoiding retrieval of ALL modex info for this proc until
|
||||
* required. Transports that delay calling modex_recv until
|
||||
* first message will therefore scale better than those that
|
||||
* call modex_recv on all procs during init.
|
||||
*/
|
||||
proc->proc_hostname = NULL;
|
||||
}
|
||||
|
@ -71,7 +71,7 @@ ompi_modex_recv(const mca_base_component_t *component,
|
||||
}
|
||||
|
||||
/* the fetch API returns a pointer to the data */
|
||||
rc = ompi_rte_db_fetch(&proc->proc_name, key, (void**)&boptr, OPAL_BYTE_OBJECT);
|
||||
rc = ompi_rte_db_fetch(proc, key, (void**)&boptr, OPAL_BYTE_OBJECT);
|
||||
|
||||
if (OMPI_SUCCESS == rc) {
|
||||
/* xfer the data - it was allocated in the call */
|
||||
@ -102,7 +102,7 @@ int ompi_modex_recv_pointer(const mca_base_component_t *component,
|
||||
}
|
||||
|
||||
/* the fetch_poointer API returns a pointer to the data */
|
||||
rc = ompi_rte_db_fetch_pointer(&proc->proc_name, name, buffer, type);
|
||||
rc = ompi_rte_db_fetch_pointer(proc, name, buffer, type);
|
||||
free(name);
|
||||
|
||||
return rc;
|
||||
@ -138,7 +138,7 @@ ompi_modex_recv_string(const char* key,
|
||||
*size = 0;
|
||||
|
||||
/* the fetch API returns a copy of the data */
|
||||
rc = ompi_rte_db_fetch(&source_proc->proc_name, key, (void**)&boptr, OPAL_BYTE_OBJECT);
|
||||
rc = ompi_rte_db_fetch(source_proc, key, (void**)&boptr, OPAL_BYTE_OBJECT);
|
||||
|
||||
if (OMPI_SUCCESS == rc) {
|
||||
/* xfer the data for local use */
|
||||
@ -163,7 +163,7 @@ int ompi_modex_recv_string_pointer(const char* key,
|
||||
*buffer = NULL;
|
||||
|
||||
/* the fetch_pointer API returns a pointer to the data */
|
||||
rc = ompi_rte_db_fetch_pointer(&source_proc->proc_name, key, (void**)buffer, type);
|
||||
rc = ompi_rte_db_fetch_pointer(source_proc, key, (void**)buffer, type);
|
||||
|
||||
return rc;
|
||||
}
|
||||
@ -188,7 +188,7 @@ int ompi_modex_recv_key_value(const char* key,
|
||||
int rc;
|
||||
|
||||
/* the fetch API returns the data */
|
||||
rc = ompi_rte_db_fetch(&source_proc->proc_name, key, (void**)value, type);
|
||||
rc = ompi_rte_db_fetch(source_proc, key, (void**)value, type);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user