1
1
openmpi/orte/mca/ras/localhost/ras_localhost_module.c
Jeff Squyres 0629cdc2d7 Bring back the changes from /tmp/jjhursey-rmaps. Specific merge
command:

svn merge -r 7567:7663 https://svn.open-mpi.org/svn/ompi/tmp/jjhursey-rmaps .

(where "." is a trunk checkout)

The logs from this branch are much more descriptive than I will put
here (including a *really* long description from last night).  Here's
the short version:

- fixed some broken implementations in ras and rmaps
- "orterun --host ..." now works and has clearly defined semantics
  (this was the impetus for the branch and all these fixes -- LANL had
  a requirement for --host to work for 1.0)
- there is still a little bit of cleanup left to do post-1.0 (we got
  correct functionality for 1.0 -- we did not fix bad implementations
  that still "work")
  - rds/hostfile and ras/hostfile handshaking
  - singleton node segment assignments in stage1
  - remove the default hostfile (no need for it anymore with the
    localhost ras component)
  - clean up pls components to avoid duplicate ras mapping queries
  - [possible] -bynode/-byslot being specific to a single app context 

This commit was SVN r7664.
2005-10-07 22:24:52 +00:00

137 строки
3.6 KiB
C

/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "opal/class/opal_list.h"
#include "opal/util/output.h"
#include "orte/include/orte_constants.h"
#include "orte/include/orte_types.h"
#include "orte/mca/ras/base/base.h"
#include "orte/mca/ras/base/ras_base_node.h"
#include "orte/mca/rmgr/base/base.h"
#include "orte/mca/ras/base/ras_base_node.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ras/localhost/ras_localhost.h"
/*
* Local functions
*/
static int orte_ras_localhost_allocate(orte_jobid_t jobid);
static int orte_ras_localhost_deallocate(orte_jobid_t jobid);
static int orte_ras_localhost_finalize(void);
/*
* Local variables
*/
orte_ras_base_module_t orte_ras_localhost_module = {
orte_ras_localhost_allocate,
orte_ras_base_node_insert,
orte_ras_base_node_query,
orte_ras_localhost_deallocate,
orte_ras_localhost_finalize
};
orte_ras_base_module_t *orte_ras_localhost_init(int* priority)
{
*priority = mca_ras_localhost_component.priority;
return &orte_ras_localhost_module;
}
static int orte_ras_localhost_allocate(orte_jobid_t jobid)
{
bool empty;
int ret;
opal_list_t nodes;
orte_ras_node_t *node;
opal_list_item_t *item;
/* If the node segment is not empty, do nothing */
if (ORTE_SUCCESS != (ret = orte_ras_base_node_segment_empty(&empty))) {
ORTE_ERROR_LOG(ret);
return ret;
}
if (!empty) {
opal_output(orte_ras_base.ras_output,
"orte:ras:localhost: node segment not empty; not doing anything");
return ORTE_SUCCESS;
}
opal_output(orte_ras_base.ras_output,
"orte:ras:localhost: node segment empty; adding \"localhost\"");
/* Ok, the node segment is empty -- so add a localhost node */
node = OBJ_NEW(orte_ras_node_t);
if (NULL == node) {
return ORTE_ERR_OUT_OF_RESOURCE;
}
node->node_name = strdup("localhost");
node->node_arch = NULL;
node->node_state = ORTE_NODE_STATE_UP;
/* JMS: this should not be hard-wired to 0, but there's no
other value to put it to [yet]... */
node->node_cellid = 0;
node->node_slots_inuse = 0;
node->node_slots_max = 0;
node->node_slots = 1;
OBJ_CONSTRUCT(&nodes, opal_list_t);
opal_list_append(&nodes, &node->super);
/* Put it on the segment and allocate it */
if (ORTE_SUCCESS !=
(ret = orte_ras_base_node_insert(&nodes)) ||
ORTE_SUCCESS !=
(ret = orte_ras_base_allocate_nodes(jobid, &nodes))) {
goto cleanup;
}
cleanup:
item = opal_list_remove_first(&nodes);
OBJ_RELEASE(item);
OBJ_DESTRUCT(&nodes);
/* All done */
return ret;
}
static int orte_ras_localhost_deallocate(orte_jobid_t jobid)
{
/* Nothing to do */
opal_output(orte_ras_base.ras_output,
"ras:localhost:deallocate: success (nothing to do)");
return ORTE_SUCCESS;
}
static int orte_ras_localhost_finalize(void)
{
/* Nothing to do */
opal_output(orte_ras_base.ras_output,
"ras:localhost:finalize: success (nothing to do)");
return ORTE_SUCCESS;
}