0629cdc2d7
command: svn merge -r 7567:7663 https://svn.open-mpi.org/svn/ompi/tmp/jjhursey-rmaps . (where "." is a trunk checkout) The logs from this branch are much more descriptive than I will put here (including a *really* long description from last night). Here's the short version: - fixed some broken implementations in ras and rmaps - "orterun --host ..." now works and has clearly defined semantics (this was the impetus for the branch and all these fixes -- LANL had a requirement for --host to work for 1.0) - there is still a little bit of cleanup left to do post-1.0 (we got correct functionality for 1.0 -- we did not fix bad implementations that still "work") - rds/hostfile and ras/hostfile handshaking - singleton node segment assignments in stage1 - remove the default hostfile (no need for it anymore with the localhost ras component) - clean up pls components to avoid duplicate ras mapping queries - [possible] -bynode/-byslot being specific to a single app context This commit was SVN r7664.
299 строки
7.9 KiB
C
299 строки
7.9 KiB
C
/*
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University.
|
|
* All rights reserved.
|
|
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
|
|
* All rights reserved.
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
* University of Stuttgart. All rights reserved.
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
* All rights reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*/
|
|
#include "orte_config.h"
|
|
#include <errno.h>
|
|
#include <unistd.h>
|
|
#include <string.h>
|
|
#include <sys/bproc.h>
|
|
|
|
#include "include/orte_constants.h"
|
|
#include "include/orte_types.h"
|
|
#include "opal/util/argv.h"
|
|
#include "opal/util/output.h"
|
|
#include "mca/rmgr/base/base.h"
|
|
#include "mca/ras/base/base.h"
|
|
#include "mca/ras/base/ras_base_node.h"
|
|
#include "mca/errmgr/errmgr.h"
|
|
#include "ras_bjs.h"
|
|
|
|
|
|
/**
|
|
* Query the bproc node status
|
|
*/
|
|
|
|
static int orte_ras_bjs_node_state(int node)
|
|
{
|
|
#if defined BPROC_API_VERSION && BPROC_API_VERSION >= 4
|
|
char nodestatus[BPROC_STATE_LEN + 1];
|
|
|
|
bproc_nodestatus(node, nodestatus, sizeof(nodestatus));
|
|
if (strcmp(nodestatus, "up") == 0)
|
|
return ORTE_NODE_STATE_UP;
|
|
if (strcmp(nodestatus, "down") == 0)
|
|
return ORTE_NODE_STATE_DOWN;
|
|
if (strcmp(nodestatus, "boot") == 0)
|
|
return ORTE_NODE_STATE_REBOOT;
|
|
return ORTE_NODE_STATE_UNKNOWN;
|
|
#else
|
|
switch(bproc_nodestatus(node)) {
|
|
case bproc_node_up:
|
|
return ORTE_NODE_STATE_UP;
|
|
case bproc_node_down:
|
|
return ORTE_NODE_STATE_DOWN;
|
|
case bproc_node_boot:
|
|
return ORTE_NODE_STATE_REBOOT;
|
|
default:
|
|
return ORTE_NODE_STATE_UNKNOWN;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
|
|
/**
|
|
* Parse the NODELIST to determine the number of process
|
|
* slots/processors available on the node.
|
|
*/
|
|
|
|
static size_t orte_ras_bjs_node_slots(char* node_name)
|
|
{
|
|
static char** nodelist = NULL;
|
|
char** ptr;
|
|
size_t count = 0;
|
|
if(nodelist == NULL)
|
|
nodelist = opal_argv_split(getenv("NODELIST"), ',');
|
|
ptr = nodelist;
|
|
while(ptr && *ptr) {
|
|
if(strcmp(*ptr, node_name) == 0)
|
|
count++;
|
|
ptr++;
|
|
}
|
|
return count;
|
|
}
|
|
|
|
|
|
/**
|
|
* Resolve the node name to node number.
|
|
*/
|
|
|
|
static int orte_ras_bjs_node_resolve(char* node_name, int* node_num)
|
|
{
|
|
/* for now we expect this to be the node number */
|
|
if(NULL == node_name || sscanf(node_name, "%d", node_num) != 1)
|
|
return ORTE_NODE_ERROR;
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
|
|
/**
|
|
* Discover the available resources.
|
|
* - validate any nodes specified via hostfile/commandline
|
|
* - check for additional nodes that have already been allocated
|
|
*/
|
|
|
|
static int orte_ras_bjs_discover(
|
|
opal_list_t* nodelist,
|
|
orte_app_context_t** context,
|
|
size_t num_context)
|
|
{
|
|
char* nodes;
|
|
char* ptr;
|
|
opal_list_item_t* item;
|
|
opal_list_t new_nodes;
|
|
int rc;
|
|
|
|
/* query the nodelist from the registry */
|
|
OBJ_CONSTRUCT(&new_nodes, opal_list_t);
|
|
if(ORTE_SUCCESS != (rc = orte_ras_base_node_query(nodelist))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
|
|
/* validate that any user supplied nodes actually exist, etc. */
|
|
item = opal_list_get_first(nodelist);
|
|
while(item != opal_list_get_end(nodelist)) {
|
|
opal_list_item_t* next = opal_list_get_next(item);
|
|
int node_num;
|
|
|
|
orte_ras_node_t* node = (orte_ras_node_t*)item;
|
|
if(ORTE_SUCCESS != orte_ras_bjs_node_resolve(node->node_name, &node_num)) {
|
|
opal_list_remove_item(nodelist,item);
|
|
OBJ_DESTRUCT(item);
|
|
item = next;
|
|
continue;
|
|
}
|
|
|
|
if(orte_ras_bjs_node_state(node_num) != ORTE_NODE_STATE_UP) {
|
|
opal_list_remove_item(nodelist,item);
|
|
OBJ_DESTRUCT(item);
|
|
item = next;
|
|
continue;
|
|
}
|
|
|
|
if(bproc_access(node_num, BPROC_X_OK) != 0) {
|
|
opal_list_remove_item(nodelist,item);
|
|
OBJ_DESTRUCT(item);
|
|
item = next;
|
|
continue;
|
|
}
|
|
|
|
/* try and determine the number of available slots */
|
|
if(node->node_slots == 0) {
|
|
node->node_slots_inuse = 0;
|
|
node->node_slots_max = 0;
|
|
node->node_slots = orte_ras_bjs_node_slots(node->node_name);
|
|
}
|
|
item = next;
|
|
}
|
|
|
|
|
|
/* parse the node list and check node status/access */
|
|
nodes = getenv("NODES");
|
|
if(NULL == nodes) {
|
|
return opal_list_get_size(nodelist) ? ORTE_SUCCESS : ORTE_ERR_NOT_AVAILABLE;
|
|
}
|
|
|
|
OBJ_CONSTRUCT(&new_nodes, opal_list_t);
|
|
while(NULL != (ptr = strsep(&nodes,","))) {
|
|
orte_ras_node_t *node;
|
|
orte_node_state_t node_state;
|
|
int node_num;
|
|
|
|
/* is this node already in the list */
|
|
for(item = opal_list_get_first(nodelist);
|
|
item != opal_list_get_end(nodelist);
|
|
item = opal_list_get_next(item)) {
|
|
node = (orte_ras_node_t*)item;
|
|
if(strcmp(node->node_name, ptr) == 0)
|
|
break;
|
|
}
|
|
if(item != opal_list_get_end(nodelist))
|
|
continue;
|
|
if(sscanf(ptr, "%d", &node_num) != 1) {
|
|
continue;
|
|
}
|
|
|
|
if(ORTE_NODE_STATE_UP != (node_state = orte_ras_bjs_node_state(node_num))) {
|
|
opal_output(0, "error: a specified node (%d) is not up.\n", node_num);
|
|
rc = ORTE_NODE_DOWN;
|
|
goto cleanup;
|
|
}
|
|
if(bproc_access(node_num, BPROC_X_OK) != 0) {
|
|
opal_output(0, "error: a specified node (%d) is not accessible.\n", node_num);
|
|
rc = ORTE_NODE_ERROR;
|
|
goto cleanup;
|
|
}
|
|
|
|
/* create a new node entry */
|
|
node = OBJ_NEW(orte_ras_node_t);
|
|
node->node_name = strdup(ptr);
|
|
node->node_state = node_state;
|
|
node->node_cellid = 0;
|
|
node->node_slots_inuse = 0;
|
|
node->node_slots_max = 0;
|
|
node->node_slots = orte_ras_bjs_node_slots(node->node_name);
|
|
opal_list_append(&new_nodes, &node->super);
|
|
}
|
|
|
|
/* add any newly discovered nodes to the registry */
|
|
if(opal_list_get_size(&new_nodes)) {
|
|
rc = orte_ras_base_node_insert(&new_nodes);
|
|
if(ORTE_SUCCESS != rc) {
|
|
ORTE_ERROR_LOG(rc);
|
|
}
|
|
}
|
|
|
|
/* append them to the nodelist */
|
|
while(NULL != (item = opal_list_remove_first(&new_nodes)))
|
|
opal_list_append(nodelist, item);
|
|
|
|
cleanup:
|
|
OBJ_DESTRUCT(&new_nodes);
|
|
return rc;
|
|
}
|
|
|
|
|
|
/**
|
|
* Discover available (pre-allocated) nodes. Allocate the
|
|
* requested number of nodes/process slots to the job.
|
|
*
|
|
*/
|
|
|
|
static int orte_ras_bjs_allocate(orte_jobid_t jobid)
|
|
{
|
|
opal_list_t nodes;
|
|
opal_list_item_t* item;
|
|
int rc;
|
|
orte_app_context_t **context;
|
|
size_t i, num_context;
|
|
|
|
rc = orte_rmgr_base_get_app_context(jobid, &context, &num_context);
|
|
if(ORTE_SUCCESS != rc) {
|
|
ORTE_ERROR_LOG(rc);
|
|
return rc;
|
|
}
|
|
|
|
OBJ_CONSTRUCT(&nodes, opal_list_t);
|
|
if(ORTE_SUCCESS != (rc = orte_ras_bjs_discover(&nodes, context, num_context))) {
|
|
ORTE_ERROR_LOG(rc);
|
|
goto cleanup;
|
|
}
|
|
|
|
rc = orte_ras_base_allocate_nodes(jobid, &nodes);
|
|
if(ORTE_SUCCESS != rc) {
|
|
ORTE_ERROR_LOG(rc);
|
|
}
|
|
|
|
cleanup:
|
|
while(NULL != (item = opal_list_remove_first(&nodes)))
|
|
OBJ_RELEASE(item);
|
|
OBJ_DESTRUCT(&nodes);
|
|
for(i=0; i<num_context; i++)
|
|
OBJ_RELEASE(context[i]);
|
|
free(context);
|
|
return rc;
|
|
}
|
|
|
|
static int orte_ras_bjs_node_insert(opal_list_t *nodes)
|
|
{
|
|
return orte_ras_base_node_insert(nodes);
|
|
}
|
|
|
|
static int orte_ras_bjs_node_query(opal_list_t *nodes)
|
|
{
|
|
return orte_ras_base_node_query(nodes);
|
|
}
|
|
|
|
static int orte_ras_bjs_deallocate(orte_jobid_t jobid)
|
|
{
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
|
|
static int orte_ras_bjs_finalize(void)
|
|
{
|
|
return ORTE_SUCCESS;
|
|
}
|
|
|
|
|
|
orte_ras_base_module_t orte_ras_bjs_module = {
|
|
orte_ras_bjs_allocate,
|
|
orte_ras_bjs_node_insert,
|
|
orte_ras_bjs_node_query,
|
|
orte_ras_bjs_deallocate,
|
|
orte_ras_bjs_finalize
|
|
};
|
|
|