1
1

Add some more detailed error output to the db_hash component and nidmap code. Ensure the local nodename is included in the HNP's aliases

This commit was SVN r27622.
Этот коммит содержится в:
Ralph Castain 2012-11-18 17:57:19 +00:00
родитель f2ec35536e
Коммит 43f883cb42
5 изменённых файлов: 26 добавлений и 4 удалений

Просмотреть файл

@ -123,7 +123,10 @@ enum {
ORTE_ERR_COMM_DISABLED = (ORTE_ERR_BASE - 40),
ORTE_ERR_FAILED_TO_MAP = (ORTE_ERR_BASE - 41),
ORTE_ERR_TAKE_NEXT_OPTION = (ORTE_ERR_BASE - 42),
ORTE_ERR_SENSOR_LIMIT_EXCEEDED = (ORTE_ERR_BASE - 43)
ORTE_ERR_SENSOR_LIMIT_EXCEEDED = (ORTE_ERR_BASE - 43),
ORTE_ERR_JOB_ENTRY_NOT_FOUND = (ORTE_ERR_BASE - 44),
ORTE_ERR_PROC_ENTRY_NOT_FOUND = (ORTE_ERR_BASE - 45),
ORTE_ERR_DATA_VALUE_NOT_FOUND = (ORTE_ERR_BASE - 46)
};
#define ORTE_ERR_MAX (ORTE_ERR_BASE - 100)

Просмотреть файл

@ -383,20 +383,20 @@ static int fetch(const orte_process_name_t *proc,
/* eventually, we will fetch this data - but for now, this
* is simply an error
*/
return ORTE_ERR_NOT_FOUND;
return ORTE_ERR_JOB_ENTRY_NOT_FOUND;
}
/* lookup the proc data object for this proc */
if (NULL == (proc_data = lookup_orte_proc(jtable->data, proc->vpid))) {
/* unrecoverable error */
return ORTE_ERR_OUT_OF_RESOURCE;
return ORTE_ERR_PROC_ENTRY_NOT_FOUND;
}
/* find the value */
if (NULL == (kv = lookup_keyval(proc_data, key))) {
/* again, we eventually will attempt to fetch the data - for
* now, just report it as an error */
return ORTE_ERR_NOT_FOUND;
return ORTE_ERR_DATA_VALUE_NOT_FOUND;
}
/* do the copy and check the type */

Просмотреть файл

@ -553,6 +553,8 @@ static int rte_init(void)
/* if we are to retain aliases, get ours */
if (orte_retain_aliases) {
opal_ifgetaliases(&node->alias);
/* add our own local name to it */
opal_argv_append_nosize(&node->alias, orte_process_info.nodename);
}
/* record that the daemon job is running */

Просмотреть файл

@ -184,6 +184,15 @@ int orte_err2str(int errnum, const char **errmsg)
case ORTE_ERR_SENSOR_LIMIT_EXCEEDED:
retval = "Sensor limit exceeded";
break;
case ORTE_ERR_JOB_ENTRY_NOT_FOUND:
retval = "Job entry not found";
break;
case ORTE_ERR_PROC_ENTRY_NOT_FOUND:
retval = "Proc entry not found";
break;
case ORTE_ERR_DATA_VALUE_NOT_FOUND:
retval = "Data not found";
break;
default:
if (orte_report_silent_errors) {
retval = "Unknown error";

Просмотреть файл

@ -383,6 +383,10 @@ int orte_util_decode_nodemap(opal_byte_object_t *bo)
return rc;
}
/* now store a direct reference so we can quickly lookup the daemon from a hostname */
opal_output_verbose(2, orte_nidmap_output,
"%s storing nodename %s for daemon %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
nodename, ORTE_VPID_PRINT(daemon.vpid));
if (ORTE_SUCCESS != (rc = orte_db.store(ORTE_NAME_WILDCARD, nodename, &daemon.vpid, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
return rc;
@ -421,6 +425,10 @@ int orte_util_decode_nodemap(opal_byte_object_t *bo)
return rc;
}
/* store a cross-reference to the daemon for this nodename */
opal_output_verbose(2, orte_nidmap_output,
"%s storing alias %s for daemon %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
alias, ORTE_VPID_PRINT(daemon.vpid));
if (ORTE_SUCCESS != (rc = orte_db.store(ORTE_NAME_WILDCARD, alias, &daemon.vpid, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
return rc;