1
1

Update the nidmap functions to include a new lookup_jmap entry, and to initialize the nidmap and pidmap for startup.

Have the singleton ess module use the new capability.

Adjust a comment in ess_base_put

This commit was SVN r20464.
Этот коммит содержится в:
Ralph Castain 2009-02-06 15:28:32 +00:00
родитель c5b637418b
Коммит e2a8f45fba
4 изменённых файлов: 52 добавлений и 56 удалений

Просмотреть файл

@ -40,7 +40,7 @@ int orte_ess_env_put(orte_std_cntr_t num_procs,
char* param;
char* value;
/* tell the SDS to select the env component */
/* tell the ESS to select the env component */
if(NULL == (param = mca_base_param_environ_variable("ess",NULL,NULL))) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;

Просмотреть файл

@ -97,9 +97,6 @@ orte_ess_base_module_t orte_ess_singleton_module = {
static int rte_init(char flags)
{
int rc;
orte_nid_t *node;
orte_jmap_t *jmap;
orte_pmap_t pmap;
/* run the prolog */
if (ORTE_SUCCESS != (rc = orte_ess_base_std_prolog())) {
@ -160,23 +157,6 @@ static int rte_init(char flags)
return rc;
}
/* add a jmap entry for myself */
jmap = OBJ_NEW(orte_jmap_t);
jmap->job = ORTE_PROC_MY_NAME->jobid;
opal_pointer_array_add(&orte_jobmap, jmap);
pmap.local_rank = 0;
pmap.node_rank = 0;
pmap.node = 0;
opal_value_array_set_item(&jmap->pmap, 0, &pmap);
jmap->num_procs = 1;
/* create a nidmap entry for this node */
node = OBJ_NEW(orte_nid_t);
node->name = strdup(orte_process_info.nodename);
node->daemon = 0; /* the HNP co-occupies our node */
node->arch = orte_process_info.arch;
opal_pointer_array_set_item(&orte_nidmap, 0, node);
/* use the std app init to complete the procedure */
if (ORTE_SUCCESS != (rc = orte_ess_base_app_setup())) {
ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -30,6 +30,7 @@
#include "opal/dss/dss.h"
#include "opal/runtime/opal.h"
#include "opal/class/opal_pointer_array.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/show_help.h"
@ -46,6 +47,9 @@ int orte_util_nidmap_init(opal_buffer_t *buffer)
int32_t cnt;
int rc;
opal_byte_object_t *bo;
orte_nid_t *node;
orte_jmap_t *jmap;
orte_pmap_t pmap;
if (!initialized) {
/* need to construct the global arrays */
@ -61,8 +65,30 @@ int orte_util_nidmap_init(opal_buffer_t *buffer)
initialized = true;
}
/* it is okay if the buffer is empty - could be a non-MPI proc */
/* it is okay if the buffer is empty */
if (NULL == buffer || 0 == buffer->bytes_used) {
/* if the buffer is empty, add a jmap entry for myself */
jmap = OBJ_NEW(orte_jmap_t);
jmap->job = ORTE_PROC_MY_NAME->jobid;
opal_pointer_array_add(&orte_jobmap, jmap);
jmap->num_procs = 1;
/* create a nidmap entry for this node */
node = OBJ_NEW(orte_nid_t);
node->name = strdup(orte_process_info.nodename);
node->daemon = ORTE_PROC_MY_DAEMON->vpid;
node->arch = orte_process_info.arch;
OBJ_CONSTRUCT(&pmap, orte_pmap_t);
pmap.local_rank = 0;
pmap.node_rank = 0;
pmap.node = opal_pointer_array_add(&orte_nidmap, node);
/* value array copies values, so everything must be set before
* calling the set_item function
*/
opal_value_array_set_item(&jmap->pmap, ORTE_PROC_MY_NAME->vpid, &pmap);
OBJ_DESTRUCT(&pmap);
/* all done */
return ORTE_SUCCESS;
}
@ -971,27 +997,40 @@ cleanup:
/*** NIDMAP UTILITIES ***/
orte_pmap_t* orte_util_lookup_pmap(orte_process_name_t *proc)
orte_jmap_t* orte_util_lookup_jmap(orte_jobid_t job)
{
int i;
orte_jmap_t **jmaps;
orte_pmap_t *pmap;
jmaps = (orte_jmap_t**)orte_jobmap.addr;
for (i=0; i < orte_jobmap.size && NULL != jmaps[i]; i++) {
OPAL_OUTPUT_VERBOSE((10, orte_debug_output,
"%s lookup:pmap: checking job %s for job %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(jmaps[i]->job), ORTE_JOBID_PRINT(proc->jobid)));
if (proc->jobid == jmaps[i]->job) {
pmap = (orte_pmap_t*)opal_value_array_get_item(&jmaps[i]->pmap, proc->vpid);
return pmap;
ORTE_JOBID_PRINT(jmaps[i]->job), ORTE_JOBID_PRINT(job)));
if (job == jmaps[i]->job) {
return jmaps[i];
}
}
/* if we didn't find it, return NULL */
return NULL;
}
orte_pmap_t* orte_util_lookup_pmap(orte_process_name_t *proc)
{
orte_jmap_t *jmap;
if (NULL == (jmap = orte_util_lookup_jmap(proc->jobid))) {
return NULL;
}
if (proc->vpid >= jmap->num_procs) {
return NULL;
}
return (orte_pmap_t*)opal_value_array_get_item(&jmap->pmap, proc->vpid);
}
/* the daemon's vpid does not necessarily correlate
* to the node's index in the node array since
* some nodes may not have a daemon on them. Thus,
@ -1014,12 +1053,13 @@ static orte_nid_t* find_daemon_node(orte_process_name_t *proc)
}
}
/* if we didn't find it, return NULL */
return NULL;
}
orte_nid_t* orte_util_lookup_nid(orte_process_name_t *proc)
{
orte_nid_t **nids, *nid;
orte_nid_t **nids;
orte_pmap_t *pmap;
OPAL_OUTPUT_VERBOSE((5, orte_debug_output,
@ -1027,38 +1067,13 @@ orte_nid_t* orte_util_lookup_nid(orte_process_name_t *proc)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(proc)));
/* if the proc is from a different job family, we always
* return NULL - we cannot know info for procs in other
* job families.
*/
if (ORTE_JOB_FAMILY(proc->jobid) !=
ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
/* this isn't an error - let the caller decide if an
* error message is required
*/
OPAL_OUTPUT_VERBOSE((5, orte_debug_output,
"%s lookup:nid: different job family",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
return NULL;
}
if (ORTE_PROC_IS_DAEMON(proc->jobid)) {
/* looking for a daemon in my family */
if (NULL == (nid = find_daemon_node(proc))) {
OPAL_OUTPUT_VERBOSE((5, orte_debug_output,
"%s lookup:nid: couldn't find daemon node",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
}
return nid;
/* looking for a daemon */
return find_daemon_node(proc);
}
/* looking for an application proc */
if (NULL == (pmap = orte_util_lookup_pmap(proc))) {
/* if the proc is in my job family, then this definitely is
* an error - we should always know the node of a proc
* in our job family
*/
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return NULL;
}

Просмотреть файл

@ -45,6 +45,7 @@ BEGIN_C_DECLS
ORTE_DECLSPEC int orte_util_nidmap_init(opal_buffer_t *buffer);
ORTE_DECLSPEC void orte_util_nidmap_finalize(void);
ORTE_DECLSPEC orte_jmap_t* orte_util_lookup_jmap(orte_jobid_t job);
ORTE_DECLSPEC orte_pmap_t* orte_util_lookup_pmap(orte_process_name_t *proc);
ORTE_DECLSPEC orte_nid_t* orte_util_lookup_nid(orte_process_name_t *proc);