From e2a8f45fbaf719a5e53dd464f74538cbb2277433 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 6 Feb 2009 15:28:32 +0000 Subject: [PATCH] Update the nidmap functions to include a new lookup_jmap entry, and to initialize the nidmap and pidmap for startup. Have the singleton ess module use the new capability. Adjust a comment in ess_base_put This commit was SVN r20464. --- orte/mca/ess/base/ess_base_put.c | 2 +- orte/mca/ess/singleton/ess_singleton_module.c | 20 ----- orte/util/nidmap.c | 85 +++++++++++-------- orte/util/nidmap.h | 1 + 4 files changed, 52 insertions(+), 56 deletions(-) diff --git a/orte/mca/ess/base/ess_base_put.c b/orte/mca/ess/base/ess_base_put.c index 1d64eabbae..4e43b82fc2 100644 --- a/orte/mca/ess/base/ess_base_put.c +++ b/orte/mca/ess/base/ess_base_put.c @@ -40,7 +40,7 @@ int orte_ess_env_put(orte_std_cntr_t num_procs, char* param; char* value; - /* tell the SDS to select the env component */ + /* tell the ESS to select the env component */ if(NULL == (param = mca_base_param_environ_variable("ess",NULL,NULL))) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; diff --git a/orte/mca/ess/singleton/ess_singleton_module.c b/orte/mca/ess/singleton/ess_singleton_module.c index 394d0cd336..8f5a1e85cc 100644 --- a/orte/mca/ess/singleton/ess_singleton_module.c +++ b/orte/mca/ess/singleton/ess_singleton_module.c @@ -97,9 +97,6 @@ orte_ess_base_module_t orte_ess_singleton_module = { static int rte_init(char flags) { int rc; - orte_nid_t *node; - orte_jmap_t *jmap; - orte_pmap_t pmap; /* run the prolog */ if (ORTE_SUCCESS != (rc = orte_ess_base_std_prolog())) { @@ -160,23 +157,6 @@ static int rte_init(char flags) return rc; } - /* add a jmap entry for myself */ - jmap = OBJ_NEW(orte_jmap_t); - jmap->job = ORTE_PROC_MY_NAME->jobid; - opal_pointer_array_add(&orte_jobmap, jmap); - pmap.local_rank = 0; - pmap.node_rank = 0; - pmap.node = 0; - opal_value_array_set_item(&jmap->pmap, 0, &pmap); - jmap->num_procs = 1; - - /* create a nidmap entry for this node */ - node = OBJ_NEW(orte_nid_t); - node->name = strdup(orte_process_info.nodename); - node->daemon = 0; /* the HNP co-occupies our node */ - node->arch = orte_process_info.arch; - opal_pointer_array_set_item(&orte_nidmap, 0, node); - /* use the std app init to complete the procedure */ if (ORTE_SUCCESS != (rc = orte_ess_base_app_setup())) { ORTE_ERROR_LOG(rc); diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index 4f9530637f..66613e0d29 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -30,6 +30,7 @@ #include "opal/dss/dss.h" #include "opal/runtime/opal.h" +#include "opal/class/opal_pointer_array.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/util/show_help.h" @@ -46,6 +47,9 @@ int orte_util_nidmap_init(opal_buffer_t *buffer) int32_t cnt; int rc; opal_byte_object_t *bo; + orte_nid_t *node; + orte_jmap_t *jmap; + orte_pmap_t pmap; if (!initialized) { /* need to construct the global arrays */ @@ -61,8 +65,30 @@ int orte_util_nidmap_init(opal_buffer_t *buffer) initialized = true; } - /* it is okay if the buffer is empty - could be a non-MPI proc */ + /* it is okay if the buffer is empty */ if (NULL == buffer || 0 == buffer->bytes_used) { + /* if the buffer is empty, add a jmap entry for myself */ + jmap = OBJ_NEW(orte_jmap_t); + jmap->job = ORTE_PROC_MY_NAME->jobid; + opal_pointer_array_add(&orte_jobmap, jmap); + jmap->num_procs = 1; + + /* create a nidmap entry for this node */ + node = OBJ_NEW(orte_nid_t); + node->name = strdup(orte_process_info.nodename); + node->daemon = ORTE_PROC_MY_DAEMON->vpid; + node->arch = orte_process_info.arch; + OBJ_CONSTRUCT(&pmap, orte_pmap_t); + pmap.local_rank = 0; + pmap.node_rank = 0; + pmap.node = opal_pointer_array_add(&orte_nidmap, node); + /* value array copies values, so everything must be set before + * calling the set_item function + */ + opal_value_array_set_item(&jmap->pmap, ORTE_PROC_MY_NAME->vpid, &pmap); + OBJ_DESTRUCT(&pmap); + + /* all done */ return ORTE_SUCCESS; } @@ -971,27 +997,40 @@ cleanup: /*** NIDMAP UTILITIES ***/ -orte_pmap_t* orte_util_lookup_pmap(orte_process_name_t *proc) +orte_jmap_t* orte_util_lookup_jmap(orte_jobid_t job) { int i; orte_jmap_t **jmaps; - orte_pmap_t *pmap; jmaps = (orte_jmap_t**)orte_jobmap.addr; for (i=0; i < orte_jobmap.size && NULL != jmaps[i]; i++) { OPAL_OUTPUT_VERBOSE((10, orte_debug_output, "%s lookup:pmap: checking job %s for job %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_JOBID_PRINT(jmaps[i]->job), ORTE_JOBID_PRINT(proc->jobid))); - if (proc->jobid == jmaps[i]->job) { - pmap = (orte_pmap_t*)opal_value_array_get_item(&jmaps[i]->pmap, proc->vpid); - return pmap; + ORTE_JOBID_PRINT(jmaps[i]->job), ORTE_JOBID_PRINT(job))); + if (job == jmaps[i]->job) { + return jmaps[i]; } } + /* if we didn't find it, return NULL */ return NULL; } +orte_pmap_t* orte_util_lookup_pmap(orte_process_name_t *proc) +{ + orte_jmap_t *jmap; + + if (NULL == (jmap = orte_util_lookup_jmap(proc->jobid))) { + return NULL; + } + if (proc->vpid >= jmap->num_procs) { + return NULL; + } + + return (orte_pmap_t*)opal_value_array_get_item(&jmap->pmap, proc->vpid); +} + /* the daemon's vpid does not necessarily correlate * to the node's index in the node array since * some nodes may not have a daemon on them. Thus, @@ -1014,12 +1053,13 @@ static orte_nid_t* find_daemon_node(orte_process_name_t *proc) } } + /* if we didn't find it, return NULL */ return NULL; } orte_nid_t* orte_util_lookup_nid(orte_process_name_t *proc) { - orte_nid_t **nids, *nid; + orte_nid_t **nids; orte_pmap_t *pmap; OPAL_OUTPUT_VERBOSE((5, orte_debug_output, @@ -1027,38 +1067,13 @@ orte_nid_t* orte_util_lookup_nid(orte_process_name_t *proc) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(proc))); - /* if the proc is from a different job family, we always - * return NULL - we cannot know info for procs in other - * job families. - */ - if (ORTE_JOB_FAMILY(proc->jobid) != - ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) { - /* this isn't an error - let the caller decide if an - * error message is required - */ - OPAL_OUTPUT_VERBOSE((5, orte_debug_output, - "%s lookup:nid: different job family", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - return NULL; - } - if (ORTE_PROC_IS_DAEMON(proc->jobid)) { - /* looking for a daemon in my family */ - if (NULL == (nid = find_daemon_node(proc))) { - OPAL_OUTPUT_VERBOSE((5, orte_debug_output, - "%s lookup:nid: couldn't find daemon node", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - } - return nid; + /* looking for a daemon */ + return find_daemon_node(proc); } /* looking for an application proc */ if (NULL == (pmap = orte_util_lookup_pmap(proc))) { - /* if the proc is in my job family, then this definitely is - * an error - we should always know the node of a proc - * in our job family - */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return NULL; } diff --git a/orte/util/nidmap.h b/orte/util/nidmap.h index 3e2b3af7d3..262c93c282 100644 --- a/orte/util/nidmap.h +++ b/orte/util/nidmap.h @@ -45,6 +45,7 @@ BEGIN_C_DECLS ORTE_DECLSPEC int orte_util_nidmap_init(opal_buffer_t *buffer); ORTE_DECLSPEC void orte_util_nidmap_finalize(void); +ORTE_DECLSPEC orte_jmap_t* orte_util_lookup_jmap(orte_jobid_t job); ORTE_DECLSPEC orte_pmap_t* orte_util_lookup_pmap(orte_process_name_t *proc); ORTE_DECLSPEC orte_nid_t* orte_util_lookup_nid(orte_process_name_t *proc);