1
1

Allow the ESS to return the hostname and arch for a specified daemon instead of just for application procs. Uses the same API - just need to detect that the specified proc is a daemon and lookup its corresponding node in the nidmap.

This commit was SVN r18722.
Этот коммит содержится в:
Ralph Castain 2008-06-24 17:53:10 +00:00
родитель b118779c08
Коммит 578d1c15c6
10 изменённых файлов: 620 добавлений и 209 удалений

Просмотреть файл

@ -187,12 +187,68 @@ static bool proc_is_local(orte_process_name_t *proc)
}
/* the daemon's vpid does not necessairly correlate
* to the node's index in the node array since
* some nodes may not have a daemon on them. Thus,
* we have to search for the daemon in the array.
* Fortunately, this is rarely done
*/
static int32_t find_daemon_node(orte_vpid_t vpid)
{
int32_t i;
orte_nid_t **nids;
nids = (orte_nid_t**)nidmap.addr;
for (i=0; i < nidmap.size; i++) {
if (NULL == nids[i]) {
break;
}
if (vpid == nids[i]->daemon) {
return i;
}
}
return -1;
}
/* the daemon's vpid does not necessairly correlate
* to the node's index in the node array since
* some nodes may not have a daemon on them. Thus,
* we have to search for the daemon in the array.
* Fortunately, this is rarely done
*/
static int32_t find_daemon_node(orte_vpid_t vpid)
{
int32_t i;
orte_nid_t **nids;
nids = (orte_nid_t**)nidmap.addr;
for (i=0; i < nidmap.size; i++) {
if (NULL == nids[i]) {
break;
}
if (vpid == nids[i]->daemon) {
return i
}
}
return -1;
}
static char* proc_get_hostname(orte_process_name_t *proc)
{
int32_t node;
orte_nid_t **nids;
node = pmap[proc->vpid].node;
if (ORTE_PROC_MY_DAEMON->jobid == proc->jobid) {
/* looking for the daemon's hostname */
node = find_daemon_node(proc->vpid);
if (0 > node) {
return NULL;
}
} else {
node = pmap[proc->vpid].node;
}
nids = (orte_nid_t**)nidmap.addr;
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
@ -209,7 +265,15 @@ static uint32_t proc_get_arch(orte_process_name_t *proc)
int32_t node;
orte_nid_t **nids;
node = pmap[proc->vpid].node;
if (ORTE_PROC_MY_DAEMON->jobid == proc->jobid) {
/* looking for the daemon's arch */
node = find_daemon_node(proc->vpid);
if (0 > node) {
return 0;
}
} else {
node = pmap[proc->vpid].node;
}
nids = (orte_nid_t**)nidmap.addr;
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,

Просмотреть файл

@ -20,3 +20,9 @@
#
PARAM_CONFIG_FILES="Makefile"
#
# Set the config priority so that, if we can build,
# only BPROC component(s) will build
PARAM_CONFIG_PRIORITY=30

Просмотреть файл

@ -16,33 +16,20 @@
* $HEADER$
*/
#ifndef ORTE_SDS_BPROC_H
#define ORTE_SDS_BPROC_H
#ifndef ORTE_ESS_BPROC_H
#define ORTE_ESS_BPROC_H
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
BEGIN_C_DECLS
/*
* Module open / close
*/
int orte_sds_bproc_component_open(void);
int orte_sds_bproc_component_close(void);
orte_sds_base_module_t* orte_sds_bproc_component_init(int *priority);
/*
* Module open / close
*/
int orte_ess_bproc_component_open(void);
int orte_ess_bproc_component_close(void);
int orte_ess_bproc_component_query(mca_base_module_t **module, int *priority);
/*
* Startup / Shutdown
*/
int orte_sds_bproc_finalize(void);
ORTE_MODULE_DECLSPEC extern orte_ess_base_component_t mca_ess_bproc_component;
/*
* Module functions
*/
int orte_sds_bproc_set_name(void);
END_C_DECLS
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif /* ORTE_SDS_BPROC_H */
#endif /* ORTE_ESS_BPROC_H */

Просмотреть файл

@ -23,25 +23,26 @@
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "orte/orte_constants.h"
#include "orte/mca/sds/sds.h"
#include "orte/mca/sds/bproc/sds_bproc.h"
#include "opal/mca/base/mca_base_param.h"
extern orte_sds_base_module_t orte_sds_bproc_module;
#include "orte/mca/ess/ess.h"
#include "ess_bproc.h"
extern orte_ess_base_module_t orte_ess_bproc_module;
/*
* Instantiate the public struct with all of our public information
* and pointers to our public functions in it
*/
orte_sds_base_component_t mca_sds_bproc_component = {
orte_ess_base_component_t mca_ess_bproc_component = {
/* First, the mca_component_t struct containing meta information
about the component itself */
{
/* Indicate that we are a sds v1.0.0 component (which also
/* Indicate that we are a ess v1.0.0 component (which also
implies a specific MCA version) */
ORTE_SDS_BASE_VERSION_1_0_0,
ORTE_ESS_BASE_VERSION_1_0_0,
/* Component name and version */
"bproc",
@ -50,47 +51,37 @@ orte_sds_base_component_t mca_sds_bproc_component = {
ORTE_RELEASE_VERSION,
/* Component open and close functions */
orte_sds_bproc_component_open,
orte_sds_bproc_component_close
orte_ess_bproc_component_open,
orte_ess_bproc_component_close,
orte_ess_bproc_component_query
},
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
true
},
/* Initialization / querying functions */
orte_sds_bproc_component_init
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
};
int
orte_sds_bproc_component_open(void)
int orte_ess_bproc_component_open(void)
{
return ORTE_SUCCESS;
}
orte_sds_base_module_t *
orte_sds_bproc_component_init(int *priority)
int orte_ess_bproc_component_query(mca_base_module_t **module, int *priority)
{
int id;
char *mode;
/* okay, not seed/singleton attempt another approach */
id = mca_base_param_register_string("ns", "nds", NULL, NULL, NULL);
mca_base_param_lookup_string(id, &mode);
if (NULL == mode || 0 != strcmp("bproc", mode)) { return NULL; }
/* if we can build, we are undoubtedly in a bproc environment,
* so set a reasonable priority
*/
*priority = 20;
return &orte_sds_bproc_module;
*module = (mca_base_module_t*)&orte_ess_bproc_module;
return ORTE_SUCCESS;
}
int
orte_sds_bproc_component_close(void)
int orte_ess_bproc_component_close(void)
{
return ORTE_SUCCESS;
}

Просмотреть файл

@ -18,161 +18,362 @@
*/
#include "orte_config.h"
#include "orte/constants.h"
#include <sys/bproc.h>
#include "orte/orte_constants.h"
#include "orte/util/sys_info.h"
#include "opal/util/output.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/mca/sds/sds.h"
#include "orte/mca/sds/base/base.h"
#include "orte/mca/sds/bproc/sds_bproc.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/ns/base/base.h"
#include "orte/mca/errmgr/base/base.h"
orte_sds_base_module_t orte_sds_bproc_module = {
orte_sds_base_basic_contact_universe,
orte_sds_bproc_set_name,
orte_sds_bproc_finalize,
#include "orte/mca/ess/ess.h"
#include "orte/mca/ess/base/base.h"
#include "orte/mca/errmgr/base/base.h"
#include "orte/util/show_help.h"
#include "orte/util/name_fns.h"
#include "ess_bproc.h"
static int bproc_set_name(void);
static int rte_init(char flags);
static int rte_finalize(void);
static bool proc_is_local(orte_process_name_t *proc);
static char* proc_get_hostname(orte_process_name_t *proc);
static uint32_t proc_get_arch(orte_process_name_t *proc);
static uint8_t proc_get_local_rank(orte_process_name_t *proc);
static uint8_t proc_get_node_rank(orte_process_name_t *proc);
static int update_arch(orte_process_name_t *proc, uint32_t arch);
orte_ess_base_module_t orte_ess_bproc_module = {
rte_init,
rte_finalize,
orte_ess_base_app_abort,
proc_is_local,
proc_get_hostname,
proc_get_arch,
proc_get_local_rank,
proc_get_node_rank,
update_arch,
NULL /* no FT support for Bproc */
};
static opal_pointer_array_t nidmap;
static orte_pmap_t *pmap = NULL;
static orte_vpid_t nprocs;
static int rte_init(char flags)
{
int ret;
char *error = NULL;
/* run the prolog */
if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
error = "orte_ess_base_std_prolog";
goto error;
}
/* Start by getting a unique name from the enviro */
bproc_set_name();
/* if I am a daemon, complete my setup using the
* default procedure
*/
if (orte_process_info.daemon) {
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_setup())) {
ORTE_ERROR_LOG(ret);
error = "orte_ess_base_orted_setup";
goto error;
}
} else if (orte_process_info.tool) {
/* otherwise, if I am a tool proc, use that procedure */
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup())) {
ORTE_ERROR_LOG(ret);
error = "orte_ess_base_tool_setup";
goto error;
}
} else {
/* otherwise, I must be an application process - use
* the default procedure to finish my setup
*/
if (ORTE_SUCCESS != (ret = orte_ess_base_app_setup())) {
ORTE_ERROR_LOG(ret);
error = "orte_ess_base_app_setup";
goto error;
}
/* setup the nidmap arrays */
OBJ_CONSTRUCT(&nidmap, opal_pointer_array_t);
opal_pointer_array_init(&nidmap, 8, INT32_MAX, 8);
/* if one was provided, build my nidmap */
if (ORTE_SUCCESS != (ret = orte_ess_base_build_nidmap(orte_process_info.sync_buf,
&nidmap, &pmap, &nprocs))) {
ORTE_ERROR_LOG(ret);
error = "orte_ess_base_build_nidmap";
goto error;
}
}
return ORTE_SUCCESS;
error:
orte_show_help("help-orte-runtime.txt",
"orte_init:startup:internal-failure",
true, error, ORTE_ERROR_NAME(ret), ret);
return ret;
}
static int rte_finalize(void)
{
int ret;
orte_nid_t **nids;
int32_t i;
/* if I am a daemon, finalize using the default procedure */
if (orte_process_info.daemon) {
if (ORTE_SUCCESS != (ret = orte_ess_base_orted_finalize())) {
ORTE_ERROR_LOG(ret);
}
} else if (orte_process_info.tool) {
/* otherwise, if I am a tool proc, use that procedure */
if (ORTE_SUCCESS != (ret = orte_ess_base_tool_finalize())) {
ORTE_ERROR_LOG(ret);
}
} else {
/* otherwise, I must be an application process - deconstruct
* my nidmap arrays
*/
nids = (orte_nid_t**)nidmap.addr;
for (i=0; i < nidmap.size; i++) {
if (NULL == nids[i]) {
break;
}
if (NULL != nids[i]->name) {
free(nids[i]->name);
nids[i]->name = NULL;
}
}
OBJ_DESTRUCT(&nidmap);
free(pmap);
pmap = NULL;
/* use the default procedure to finish */
if (ORTE_SUCCESS != (ret = orte_ess_base_app_finalize())) {
ORTE_ERROR_LOG(ret);
}
}
return ret;
}
static bool proc_is_local(orte_process_name_t *proc)
{
if (pmap[proc->vpid].node == (int32_t)ORTE_PROC_MY_DAEMON->vpid) {
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:bproc: proc %s is LOCAL",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc)));
return true;
}
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:bproc: proc %s is REMOTE",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc)));
return false;
}
/* the daemon's vpid does not necessairly correlate
* to the node's index in the node array since
* some nodes may not have a daemon on them. Thus,
* we have to search for the daemon in the array.
* Fortunately, this is rarely done
*/
static int32_t find_daemon_node(orte_vpid_t vpid)
{
int32_t i;
orte_nid_t **nids;
nids = (orte_nid_t**)nidmap.addr;
for (i=0; i < nidmap.size; i++) {
if (NULL == nids[i]) {
break;
}
if (vpid == nids[i]->daemon) {
return i;
}
}
return -1;
}
static char* proc_get_hostname(orte_process_name_t *proc)
{
int32_t node;
orte_nid_t **nids;
if (ORTE_PROC_MY_DAEMON->jobid == proc->jobid) {
/* looking for the daemon's hostname */
node = find_daemon_node(proc->vpid);
if (0 > node) {
return NULL;
}
} else {
node = pmap[proc->vpid].node;
}
nids = (orte_nid_t**)nidmap.addr;
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:bproc: proc %s is on host %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
nids[node]->name));
return nids[node]->name;
}
static uint32_t proc_get_arch(orte_process_name_t *proc)
{
int32_t node;
orte_nid_t **nids;
if (ORTE_PROC_MY_DAEMON->jobid == proc->jobid) {
/* looking for the daemon's arch */
node = find_daemon_node(proc->vpid);
if (0 > node) {
return 0;
}
} else {
node = pmap[proc->vpid].node;
}
nids = (orte_nid_t**)nidmap.addr;
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:bproc: proc %s has arch %0x",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
nids[node]->arch));
return nids[node]->arch;
}
static int update_arch(orte_process_name_t *proc, uint32_t arch)
{
int32_t node;
orte_nid_t **nids;
node = pmap[proc->vpid].node;
nids = (orte_nid_t**)nidmap.addr;
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:bproc: updating proc %s to arch %0x",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
arch));
nids[node]->arch = arch;
return ORTE_SUCCESS;
}
static uint8_t proc_get_local_rank(orte_process_name_t *proc)
{
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:bproc: proc %s has local rank %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
(int)pmap[proc->vpid].local_rank));
return pmap[proc->vpid].local_rank;
}
static uint8_t proc_get_node_rank(orte_process_name_t *proc)
{
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
"%s ess:bproc: proc %s has node rank %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc),
(int)pmap[proc->vpid].node_rank));
return pmap[proc->vpid].node_rank;
}
/**
* Sets up the process name from the information put into the environment
* by the bproc launcher and orte_ns_nds_bproc_put.
* by the bproc launcher.
* @retval ORTE_SUCCESS
* @retval error
*/
int orte_sds_bproc_set_name(void)
static int bproc_set_name(void)
{
int rc;
int id;
char* name_string = NULL;
id = mca_base_param_register_string("ns", "nds", "name", NULL, NULL);
mca_base_param_lookup_string(id, &name_string);
if(name_string != NULL) {
if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_process_name(
&(orte_process_info.my_name),
name_string))) {
ORTE_ERROR_LOG(rc);
free(name_string);
return rc;
}
free(name_string);
} else {
orte_cellid_t cellid;
orte_jobid_t jobid;
orte_vpid_t vpid;
orte_vpid_t vpid_start;
char* cellid_string;
char* jobid_string;
char* vpid_string;
int num_procs;
char *bproc_rank_string;
int bproc_rank;
int stride;
id = mca_base_param_register_string("ns", "nds", "cellid", NULL, NULL);
mca_base_param_lookup_string(id, &cellid_string);
if (NULL == cellid_string) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_cellid(&cellid, cellid_string))) {
ORTE_ERROR_LOG(rc);
return(rc);
}
id = mca_base_param_register_string("ns", "nds", "jobid", NULL, NULL);
mca_base_param_lookup_string(id, &jobid_string);
if (NULL == jobid_string) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_jobid(&jobid, jobid_string))) {
ORTE_ERROR_LOG(rc);
return(rc);
}
/* BPROC_RANK is set by bproc when we do a parallel launch */
bproc_rank_string = getenv("BPROC_RANK");
if (NULL == bproc_rank_string) {
opal_output(0, "orte_ns_nds_bproc_get: Error: Environment variable "
"BPROC_RANK not found.\n");
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
bproc_rank = (int)strtol(bproc_rank_string, NULL, 10);
/* to compute our process name, we need to know two other things: the
* stride (i.e., the size of the step between vpids in this launch
* wave) and the starting vpid of this launch. Get those values here
*/
id = mca_base_param_register_int("pls", "bproc", "stride", NULL, -1);
mca_base_param_lookup_int(id, &stride);
if (stride < 0) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
id = mca_base_param_register_string("ns", "nds", "vpid_start", NULL, NULL);
mca_base_param_lookup_string(id, &vpid_string);
if (NULL == vpid_string) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
rc = orte_ns.convert_string_to_vpid(&vpid_start, vpid_string);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return(rc);
}
/* compute our vpid */
vpid = vpid_start + (bproc_rank * stride);
/* create our name */
if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(
&(orte_process_info.my_name),
cellid,
jobid,
vpid))) {
ORTE_ERROR_LOG(rc);
return rc;
}
id = mca_base_param_register_int("ns", "nds", "num_procs", NULL, -1);
mca_base_param_lookup_int(id, &num_procs);
if (num_procs < 0) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
orte_process_info.num_procs = (size_t)num_procs;
id = mca_base_param_register_string("ns", "nds", "global_vpid_start", NULL, NULL);
mca_base_param_lookup_string(id, &vpid_string);
if (NULL == vpid_string) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
rc = orte_ns.convert_string_to_vpid(&orte_process_info.vpid_start, vpid_string);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return(rc);
}
if(NULL != orte_system_info.nodename)
free(orte_system_info.nodename);
asprintf(&orte_system_info.nodename, "%d", bproc_currnode());
char *jobid_str, *vpid_string, *bproc_rank_string;
orte_jobid_t jobid;
orte_vpid_t vpid_start;
int stride, bproc_rank;
id = mca_base_param_register_string("orte", "ess", "jobid", NULL, NULL);
mca_base_param_lookup_string(id, &jobid_str);
if (NULL == jobid_str) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
return ORTE_SUCCESS;
}
int
orte_sds_bproc_finalize(void)
{
if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_jobid(&jobid, jobid_str))) {
ORTE_ERROR_LOG(rc);
return(rc);
}
free(jobid_str);
ORTE_PROC_MY_NAME->jobid = jobid;
/* BPROC_RANK is set by bproc when we do a parallel launch */
bproc_rank_string = getenv("BPROC_RANK");
if (NULL == bproc_rank_string) {
opal_output(0, "ess:bproc_get: Error: Environment variable "
"BPROC_RANK not found.\n");
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
bproc_rank = (int)strtol(bproc_rank_string, NULL, 10);
/* to compute our process name, we need to know two other things: the
* stride (i.e., the size of the step between vpids in this launch
* wave) and the starting vpid of this launch. Get those values here
*/
id = mca_base_param_register_int("orte", "ess_bproc", "stride", NULL, -1);
mca_base_param_lookup_int(id, &stride);
if (stride < 0) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
id = mca_base_param_register_string("orte", "ess_bproc", "vpid_start", NULL, NULL);
mca_base_param_lookup_string(id, &vpid_string);
if (NULL == vpid_string) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
rc = orte_util_convert_string_to_vpid(&vpid_start, vpid_string);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return(rc);
}
/* compute our vpid */
ORTE_PROC_MY_NAME->vpid = vpid_start + (bproc_rank * stride);
if(NULL != orte_process_info.nodename) {
free(orte_process_info.nodename);
}
asprintf(&orte_process_info.nodename, "%d", bproc_currnode());
return ORTE_SUCCESS;
}

44
orte/mca/ess/env/ess_env_module.c поставляемый
Просмотреть файл

@ -240,12 +240,44 @@ static bool proc_is_local(orte_process_name_t *proc)
}
/* the daemon's vpid does not necessairly correlate
* to the node's index in the node array since
* some nodes may not have a daemon on them. Thus,
* we have to search for the daemon in the array.
* Fortunately, this is rarely done
*/
static int32_t find_daemon_node(orte_vpid_t vpid)
{
int32_t i;
orte_nid_t **nids;
nids = (orte_nid_t**)nidmap.addr;
for (i=0; i < nidmap.size; i++) {
if (NULL == nids[i]) {
break;
}
if (vpid == nids[i]->daemon) {
return i;
}
}
return -1;
}
static char* proc_get_hostname(orte_process_name_t *proc)
{
int32_t node;
orte_nid_t **nids;
node = pmap[proc->vpid].node;
if (ORTE_PROC_MY_DAEMON->jobid == proc->jobid) {
/* looking for the daemon's hostname */
node = find_daemon_node(proc->vpid);
if (0 > node) {
return NULL;
}
} else {
node = pmap[proc->vpid].node;
}
nids = (orte_nid_t**)nidmap.addr;
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
@ -262,7 +294,15 @@ static uint32_t proc_get_arch(orte_process_name_t *proc)
int32_t node;
orte_nid_t **nids;
node = pmap[proc->vpid].node;
if (ORTE_PROC_MY_DAEMON->jobid == proc->jobid) {
/* looking for the daemon's arch */
node = find_daemon_node(proc->vpid);
if (0 > node) {
return 0;
}
} else {
node = pmap[proc->vpid].node;
}
nids = (orte_nid_t**)nidmap.addr;
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,

Просмотреть файл

@ -419,9 +419,11 @@ static int rte_init(char flags)
return ORTE_SUCCESS;
error:
orte_show_help("help-orte-runtime.txt",
"orte_init:startup:internal-failure",
true, error, ORTE_ERROR_NAME(ret), ret);
if (ORTE_ERR_SILENT != ret) {
orte_show_help("help-orte-runtime.txt",
"orte_init:startup:internal-failure",
true, error, ORTE_ERROR_NAME(ret), ret);
}
return ret;
}

Просмотреть файл

@ -197,12 +197,44 @@ static bool proc_is_local(orte_process_name_t *proc)
}
/* the daemon's vpid does not necessairly correlate
* to the node's index in the node array since
* some nodes may not have a daemon on them. Thus,
* we have to search for the daemon in the array.
* Fortunately, this is rarely done
*/
static int32_t find_daemon_node(orte_vpid_t vpid)
{
int32_t i;
orte_nid_t **nids;
nids = (orte_nid_t**)nidmap.addr;
for (i=0; i < nidmap.size; i++) {
if (NULL == nids[i]) {
break;
}
if (vpid == nids[i]->daemon) {
return i;
}
}
return -1;
}
static char* proc_get_hostname(orte_process_name_t *proc)
{
int32_t node;
orte_nid_t **nids;
node = pmap[proc->vpid].node;
if (ORTE_PROC_MY_DAEMON->jobid == proc->jobid) {
/* looking for the daemon's hostname */
node = find_daemon_node(proc->vpid);
if (0 > node) {
return NULL;
}
} else {
node = pmap[proc->vpid].node;
}
nids = (orte_nid_t**)nidmap.addr;
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
@ -219,7 +251,15 @@ static uint32_t proc_get_arch(orte_process_name_t *proc)
int32_t node;
orte_nid_t **nids;
node = pmap[proc->vpid].node;
if (ORTE_PROC_MY_DAEMON->jobid == proc->jobid) {
/* looking for the daemon's arch */
node = find_daemon_node(proc->vpid);
if (0 > node) {
return 0;
}
} else {
node = pmap[proc->vpid].node;
}
nids = (orte_nid_t**)nidmap.addr;
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,

Просмотреть файл

@ -421,12 +421,44 @@ static bool proc_is_local(orte_process_name_t *proc)
}
/* the daemon's vpid does not necessairly correlate
* to the node's index in the node array since
* some nodes may not have a daemon on them. Thus,
* we have to search for the daemon in the array.
* Fortunately, this is rarely done
*/
static int32_t find_daemon_node(orte_vpid_t vpid)
{
int32_t i;
orte_nid_t **nids;
nids = (orte_nid_t**)nidmap.addr;
for (i=0; i < nidmap.size; i++) {
if (NULL == nids[i]) {
break;
}
if (vpid == nids[i]->daemon) {
return i;
}
}
return -1;
}
static char* proc_get_hostname(orte_process_name_t *proc)
{
int32_t node;
orte_nid_t **nids;
node = pmap[proc->vpid].node;
if (ORTE_PROC_MY_DAEMON->jobid == proc->jobid) {
/* looking for the daemon's hostname */
node = find_daemon_node(proc->vpid);
if (0 > node) {
return NULL;
}
} else {
node = pmap[proc->vpid].node;
}
nids = (orte_nid_t**)nidmap.addr;
return nids[node]->name;
}
@ -436,7 +468,15 @@ static uint32_t proc_get_arch(orte_process_name_t *proc)
int32_t node;
orte_nid_t **nids;
node = pmap[proc->vpid].node;
if (ORTE_PROC_MY_DAEMON->jobid == proc->jobid) {
/* looking for the daemon's arch */
node = find_daemon_node(proc->vpid);
if (0 > node) {
return 0;
}
} else {
node = pmap[proc->vpid].node;
}
nids = (orte_nid_t**)nidmap.addr;
return nids[node]->arch;
}

Просмотреть файл

@ -198,12 +198,44 @@ static bool proc_is_local(orte_process_name_t *proc)
}
/* the daemon's vpid does not necessairly correlate
* to the node's index in the node array since
* some nodes may not have a daemon on them. Thus,
* we have to search for the daemon in the array.
* Fortunately, this is rarely done
*/
static int32_t find_daemon_node(orte_vpid_t vpid)
{
int32_t i;
orte_nid_t **nids;
nids = (orte_nid_t**)nidmap.addr;
for (i=0; i < nidmap.size; i++) {
if (NULL == nids[i]) {
break;
}
if (vpid == nids[i]->daemon) {
return i;
}
}
return -1;
}
static char* proc_get_hostname(orte_process_name_t *proc)
{
int32_t node;
orte_nid_t **nids;
node = pmap[proc->vpid].node;
if (ORTE_PROC_MY_DAEMON->jobid == proc->jobid) {
/* looking for the daemon's hostname */
node = find_daemon_node(proc->vpid);
if (0 > node) {
return NULL;
}
} else {
node = pmap[proc->vpid].node;
}
nids = (orte_nid_t**)nidmap.addr;
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,
@ -220,7 +252,15 @@ static uint32_t proc_get_arch(orte_process_name_t *proc)
int32_t node;
orte_nid_t **nids;
node = pmap[proc->vpid].node;
if (ORTE_PROC_MY_DAEMON->jobid == proc->jobid) {
/* looking for the daemon's arch */
node = find_daemon_node(proc->vpid);
if (0 > node) {
return 0;
}
} else {
node = pmap[proc->vpid].node;
}
nids = (orte_nid_t**)nidmap.addr;
OPAL_OUTPUT_VERBOSE((2, orte_ess_base_output,