1
1

Bring back the changes from /tmp/jjhursey-rmaps. Specific merge

command:

svn merge -r 7567:7663 https://svn.open-mpi.org/svn/ompi/tmp/jjhursey-rmaps .

(where "." is a trunk checkout)

The logs from this branch are much more descriptive than I will put
here (including a *really* long description from last night).  Here's
the short version:

- fixed some broken implementations in ras and rmaps
- "orterun --host ..." now works and has clearly defined semantics
  (this was the impetus for the branch and all these fixes -- LANL had
  a requirement for --host to work for 1.0)
- there is still a little bit of cleanup left to do post-1.0 (we got
  correct functionality for 1.0 -- we did not fix bad implementations
  that still "work")
  - rds/hostfile and ras/hostfile handshaking
  - singleton node segment assignments in stage1
  - remove the default hostfile (no need for it anymore with the
    localhost ras component)
  - clean up pls components to avoid duplicate ras mapping queries
  - [possible] -bynode/-byslot being specific to a single app context 

This commit was SVN r7664.
Этот коммит содержится в:
Jeff Squyres 2005-10-07 22:24:52 +00:00
родитель fb19cc4177
Коммит 0629cdc2d7
49 изменённых файлов: 1944 добавлений и 1085 удалений

Просмотреть файл

@ -1 +1,30 @@
localhost
#
# Copyright (c) 2004-2005 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the default hostfile for Open MPI. Notice that it does not
# contain any hosts (not even localhost). This file should only
# contain hosts if a system administrator wants users to always have
# the same set of default hosts, and is not using a batch scheduler
# (such as SLURM, PBS, etc.).
#
# If you are primarily interested in running Open MPI on one node, you
# should *not* simply list "localhost" in here (contrary to prior MPI
# implementations, such as LAM/MPI). A localhost-only node list is
# created by the RAS component named "localhost" if no other RAS
# components were able to find any hosts to run on (this behavior can
# be disabled by excluding the localhost RAS component by specifying
# the value "^localhost" [without the quotes] to the "ras" MCA
# parameter).

Просмотреть файл

@ -97,7 +97,7 @@ static inline int __poe_argv_append_int(int *argc, char ***argv, int varname, in
*/
int pls_poe_launch_interactive_orted(orte_jobid_t jobid)
{
opal_list_t nodes;
opal_list_t nodes, mapping_list;
opal_list_item_t* item;
size_t num_nodes;
orte_vpid_t vpid;
@ -114,10 +114,12 @@ int pls_poe_launch_interactive_orted(orte_jobid_t jobid)
int i;
int status;
FILE *hfp, *cfp;
/* query the list of nodes allocated to the job - don't need the entire
* mapping - as the daemon/proxy is responsibe for determining the apps
* to launch on each node.
/* Query the list of nodes allocated and mapped to this job.
* We need the entire mapping for a couple of reasons:
* - need the prefix to start with.
* - need to know if we are launching on a subset of the allocated nodes
* All other mapping responsibilities fall to orted in the fork PLS
*/
if (mca_pls_poe_component.verbose > 10) opal_output(0, "%s:--- BEGIN ---\n", __FUNCTION__);
@ -127,7 +129,8 @@ int pls_poe_launch_interactive_orted(orte_jobid_t jobid)
if((cfp=fopen(mca_pls_poe_component.cmdfile,"w"))==NULL) return ORTE_ERR_OUT_OF_RESOURCE;
OBJ_CONSTRUCT(&nodes, opal_list_t);
rc = orte_ras_base_node_query_alloc(&nodes, jobid);
OBJ_CONSTRUCT(&mapping_list, opal_list_t);
rc = orte_rmaps_base_mapped_node_query(&mapping_list, &nodes, jobid);
if(ORTE_SUCCESS != rc) {
goto cleanup;
}
@ -290,6 +293,12 @@ cleanup:
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&nodes);
while(NULL != (item = opal_list_remove_first(&mapping_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&mapping_list);
if (mca_pls_poe_component.verbose > 10) opal_output(0, "%s: --- END rc(%d) ---\n", __FUNCTION__, rc);
return rc;
}
@ -446,7 +455,7 @@ __poe_launch_interactive - launch an interactive job
*/
static inline int __poe_launch_interactive(orte_jobid_t jobid)
{
opal_list_t map, nodes;
opal_list_t map, nodes, mapping_list;
opal_list_item_t* item;
orte_vpid_t vpid_start, vpid_range;
size_t num_nodes, num_procs;
@ -468,7 +477,8 @@ static inline int __poe_launch_interactive(orte_jobid_t jobid)
mca_pls_poe_component.jobid = jobid;
OBJ_CONSTRUCT(&nodes, opal_list_t);
rc = orte_ras_base_node_query_alloc(&nodes, jobid);
OBJ_CONSTRUCT(&mapping_list, opal_list_t);
rc = orte_rmaps_base_mapped_node_query(&mapping_list, &nodes, jobid);
if(ORTE_SUCCESS != rc) { goto cleanup; }
num_nodes = opal_list_get_size(&nodes);
@ -582,6 +592,11 @@ cleanup:
}
OBJ_DESTRUCT(&nodes);
while(NULL != (item = opal_list_remove_first(&mapping_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&mapping_list);
if(mca_pls_poe_component.verbose>10) {
opal_output(0, "%s: --- END rc(%d) ---\n", __FUNCTION__, rc);
}

Просмотреть файл

@ -375,7 +375,7 @@ static void orte_pls_rsh_wait_daemon(pid_t pid, int status, void* cbdata)
int orte_pls_rsh_launch(orte_jobid_t jobid)
{
opal_list_t nodes;
opal_list_t nodes, mapping_list;
opal_list_item_t* item;
size_t num_nodes;
orte_vpid_t vpid;
@ -394,13 +394,15 @@ int orte_pls_rsh_launch(orte_jobid_t jobid)
bool remote_bash = false, remote_csh = false;
bool local_bash = false, local_csh = false;
/* query the list of nodes allocated to the job - don't need the entire
* mapping - as the daemon/proxy is responsibe for determining the apps
* to launch on each node.
/* Query the list of nodes allocated and mapped to this job.
* We need the entire mapping for a couple of reasons:
* - need the prefix to start with.
* - need to know if we are launching on a subset of the allocated nodes
* All other mapping responsibilities fall to orted in the fork PLS
*/
OBJ_CONSTRUCT(&nodes, opal_list_t);
rc = orte_ras_base_node_query_alloc(&nodes, jobid);
OBJ_CONSTRUCT(&mapping_list, opal_list_t);
rc = orte_rmaps_base_mapped_node_query(&mapping_list, &nodes, jobid);
if (ORTE_SUCCESS != rc) {
goto cleanup;
}
@ -944,8 +946,15 @@ cleanup:
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&nodes);
while (NULL != (item = opal_list_remove_first(&mapping_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&mapping_list);
free(jobid_string); /* done with this variable */
opal_argv_free(argv);
return rc;
}

Просмотреть файл

@ -81,7 +81,7 @@ extern char **environ;
static int pls_slurm_launch(orte_jobid_t jobid)
{
opal_list_t nodes;
opal_list_t nodes, mapping_list;
opal_list_item_t *item, *item2;
size_t num_nodes;
orte_vpid_t vpid;
@ -102,13 +102,15 @@ static int pls_slurm_launch(orte_jobid_t jobid)
int num_args, i;
char *cur_prefix;
/* query the list of nodes allocated to the job - don't need the entire
* mapping - as the daemon/proxy is responsibe for determining the apps
* to launch on each node.
/* Query the list of nodes allocated and mapped to this job.
* We need the entire mapping for a couple of reasons:
* - need the prefix to start with.
* - need to know if we are launching on a subset of the allocated nodes
* All other mapping responsibilities fall to orted in the fork PLS
*/
OBJ_CONSTRUCT(&nodes, opal_list_t);
rc = orte_ras_base_node_query_alloc(&nodes, jobid);
OBJ_CONSTRUCT(&mapping_list, opal_list_t);
rc = orte_rmaps_base_mapped_node_query(&mapping_list, &nodes, jobid);
if (ORTE_SUCCESS != rc) {
goto cleanup;
}
@ -379,6 +381,11 @@ cleanup:
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&nodes);
while (NULL != (item = opal_list_remove_first(&mapping_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&mapping_list);
return rc;
}

Просмотреть файл

@ -89,7 +89,7 @@ extern char **environ;
static int
pls_tm_launch(orte_jobid_t jobid)
{
opal_list_t nodes;
opal_list_t nodes, mapping_list;
opal_list_item_t *item, *item2;
size_t num_nodes;
orte_vpid_t vpid;
@ -104,13 +104,15 @@ pls_tm_launch(orte_jobid_t jobid)
opal_list_t map;
char *cur_prefix;
/* query the list of nodes allocated to the job - don't need the entire
* mapping - as the daemon/proxy is responsibe for determining the apps
* to launch on each node.
/* Query the list of nodes allocated and mapped to this job.
* We need the entire mapping for a couple of reasons:
* - need the prefix to start with.
* - need to know if we are launching on a subset of the allocated nodes
* All other mapping responsibilities fall to orted in the fork PLS
*/
OBJ_CONSTRUCT(&nodes, opal_list_t);
rc = orte_ras_base_node_query_alloc(&nodes, jobid);
OBJ_CONSTRUCT(&mapping_list, opal_list_t);
rc = orte_rmaps_base_mapped_node_query(&mapping_list, &nodes, jobid);
if (ORTE_SUCCESS != rc) {
goto cleanup;
}
@ -391,6 +393,12 @@ cleanup:
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&nodes);
while (NULL != (item = opal_list_remove_first(&mapping_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&mapping_list);
return rc;
}

Просмотреть файл

@ -237,7 +237,7 @@ mca_pls_xgrid_set_node_name(orte_ras_node_t* node,
-(int) launchJob:(orte_jobid_t) jobid
{
opal_list_t nodes;
opal_list_t nodes, mapping_list;
opal_list_item_t *item;
int ret;
size_t num_nodes;
@ -250,7 +250,8 @@ mca_pls_xgrid_set_node_name(orte_ras_node_t* node,
/* query the list of nodes allocated to the job */
OBJ_CONSTRUCT(&nodes, opal_list_t);
ret = orte_ras_base_node_query_alloc(&nodes, jobid);
OBJ_CONSTRUCT(&mapping_list, opal_list_t);
rc = orte_rmaps_base_mapped_node_query(&mapping_list, &nodes, jobid);
if (ORTE_SUCCESS != ret) goto cleanup;
/* allocate vpids for the daemons */
@ -356,6 +357,12 @@ cleanup:
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&nodes);
while(NULL != (item = opal_list_remove_first(&mapping_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&mapping_list);
return ret;
}

Просмотреть файл

@ -14,8 +14,6 @@
# $HEADER$
#
noinst_LTLIBRARIES = libmca_ras_base.la
# Source code files
@ -28,12 +26,12 @@ headers = \
libmca_ras_base_la_SOURCES = \
$(headers) \
ras_base_alloc.c \
ras_base_allocate.c \
ras_base_close.c \
ras_base_find_available.c \
ras_base_node.h \
ras_base_node.c \
ras_base_open.c \
ras_base_select.c
ras_base_open.c
# Conditionally install the header files

Просмотреть файл

@ -56,15 +56,14 @@ typedef struct orte_ras_base_cmp_t orte_ras_base_cmp_t;
* function definitions
*/
ORTE_DECLSPEC int orte_ras_base_open(void);
ORTE_DECLSPEC int orte_ras_base_find_available(void);
ORTE_DECLSPEC int orte_ras_base_allocate(orte_jobid_t job,
orte_ras_base_module_t **m);
ORTE_DECLSPEC int orte_ras_base_finalize(void);
ORTE_DECLSPEC int orte_ras_base_close(void);
ORTE_DECLSPEC orte_ras_base_module_t* orte_ras_base_select(const char*);
ORTE_DECLSPEC int orte_ras_base_allocate(orte_jobid_t job);
ORTE_DECLSPEC int orte_ras_base_deallocate(orte_jobid_t job);
ORTE_DECLSPEC int orte_ras_base_allocate_nodes_by_node(orte_jobid_t jobid,
opal_list_t* nodes);
ORTE_DECLSPEC int orte_ras_base_allocate_nodes_by_slot(orte_jobid_t jobid,
opal_list_t* nodes);
ORTE_DECLSPEC int orte_ras_base_allocate_nodes(orte_jobid_t jobid,
opal_list_t* nodes);
/*
* globals that might be needed
@ -74,7 +73,9 @@ ORTE_DECLSPEC int orte_ras_base_allocate_nodes_by_slot(orte_jobid_t jobid,
typedef struct orte_ras_base_t {
int ras_output;
opal_list_t ras_opened;
bool ras_opened_valid;
opal_list_t ras_available;
bool ras_available_valid;
size_t ras_num_nodes;
} orte_ras_base_t;

Просмотреть файл

@ -25,209 +25,28 @@
#include "mca/rmgr/base/base.h"
#include "mca/errmgr/errmgr.h"
/*
* Allocate one process per node on a round-robin basis, looping back
* around to the beginning as necessary
* Mark nodes as allocated on the registry
*/
int orte_ras_base_allocate_nodes_by_node(orte_jobid_t jobid,
opal_list_t* nodes)
int orte_ras_base_allocate_nodes(orte_jobid_t jobid,
opal_list_t* nodes)
{
opal_list_t allocated;
opal_list_item_t* item;
size_t num_requested = 0;
size_t num_allocated = 0;
size_t num_constrained = 0;
size_t slots;
bool oversubscribe = false;
int rc;
/* query for the number of process slots required */
if (ORTE_SUCCESS !=
(rc = orte_rmgr_base_get_job_slots(jobid, &num_requested))) {
return rc;
}
OBJ_CONSTRUCT(&allocated, opal_list_t);
num_allocated = 0;
/* This loop continues until all procs have been allocated or we run
out of resources. There are two definitions of "run out of
resources":
1. All nodes have node_slots processes allocated to them
2. All nodes have node_slots_max processes allocated to them
We first map until condition #1 is met. If there are still
processes that haven't been allocated yet, then we continue
until condition #2 is met. If we still have processes that
haven't been allocated yet, then it's an "out of resources"
error. */
while (num_allocated < num_requested) {
num_constrained = 0;
/* loop over all nodes until either all processes are
allocated or they all become constrained */
for (item = opal_list_get_first(nodes);
item != opal_list_get_end(nodes) && num_allocated < num_requested;
item = opal_list_get_next(item)) {
orte_ras_node_t* node = (orte_ras_node_t*)item;
/* are any slots available? */
slots = (oversubscribe ? node->node_slots_max : node->node_slots);
if (node->node_slots_inuse < slots ||
(oversubscribe && 0 == slots)) {
++num_allocated;
++node->node_slots_inuse; /* running total */
++node->node_slots_alloc; /* this job */
} else {
++num_constrained;
}
}
/* if all nodes are constrained:
- if this is the first time through the loop, then set
"oversubscribe" to true, and we'll now start obeying
node_slots_max instead of node_slots
- if this is the second time through the loop, then all
nodes are full to the max, and therefore we can't do
anything more -- we're out of resources */
if (opal_list_get_size(nodes) == num_constrained) {
if (oversubscribe) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
} else {
oversubscribe = true;
}
}
}
/* move all nodes w/ allocations to the allocated list */
item = opal_list_get_first(nodes);
while(item != opal_list_get_end(nodes)) {
/* Increment the allocation field on each node so we know that
* it has been allocated to us. No further logic needed, that is left to rmaps */
for (item = opal_list_get_first(nodes);
item != opal_list_get_end(nodes);
item = opal_list_get_next(item)) {
orte_ras_node_t* node = (orte_ras_node_t*)item;
opal_list_item_t* next = opal_list_get_next(item);
if(node->node_slots_alloc) {
opal_list_remove_item(nodes, item);
opal_list_append(&allocated, item);
}
item = next;
node->node_slots_alloc++;
}
rc = orte_ras_base_node_assign(&allocated, jobid);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
cleanup:
while(NULL != (item = opal_list_remove_first(&allocated)))
opal_list_append(nodes, item);
OBJ_DESTRUCT(&allocated);
return rc;
}
/*
* Allocate processes to nodes, using all available slots on a node.
*/
int orte_ras_base_allocate_nodes_by_slot(orte_jobid_t jobid,
opal_list_t* nodes)
{
opal_list_t allocated;
opal_list_item_t* item;
size_t num_requested = 0;
size_t num_allocated = 0;
size_t num_constrained = 0;
size_t available;
int rc;
/* query for the number of process slots required */
if (ORTE_SUCCESS !=
(rc = orte_rmgr_base_get_job_slots(jobid, &num_requested))) {
return rc;
}
OBJ_CONSTRUCT(&allocated, opal_list_t);
num_allocated = 0;
/* In the first pass, just grab all available slots (i.e., stay <=
node_slots) greedily off each node */
for (item = opal_list_get_first(nodes);
item != opal_list_get_end(nodes) && num_allocated < num_requested;
item = opal_list_get_next(item)) {
orte_ras_node_t* node = (orte_ras_node_t*)item;
/* are any slots available? */
if (node->node_slots_inuse < node->node_slots) {
available = node->node_slots - node->node_slots_inuse;
if (num_requested - num_allocated < available) {
node->node_slots_inuse +=
(num_requested - num_allocated); /* running total */
node->node_slots_alloc +=
(num_requested - num_allocated); /* this job */
num_allocated = num_requested;
} else {
num_allocated += available;
node->node_slots_inuse += available; /* running total */
node->node_slots_alloc += available; /* this job */
}
}
}
/* If we're not done, then we're in an oversubscribing situation.
Switch to a round-robin-by-node policy -- take one slot from
each node until we hit node_slots_max or we have no more
resources; whichever occurs first. */
while (num_allocated < num_requested) {
num_constrained = 0;
/* loop over all nodes until either all processes are
allocated or they all become constrained */
for (item = opal_list_get_first(nodes);
item != opal_list_get_end(nodes) && num_allocated < num_requested;
item = opal_list_get_next(item)) {
orte_ras_node_t* node = (orte_ras_node_t*)item;
/* are any slots available? */
if (node->node_slots_inuse < node->node_slots_max ||
0 == node->node_slots_max) {
++num_allocated;
++node->node_slots_inuse; /* running total */
++node->node_slots_alloc; /* this job */
} else {
++num_constrained;
}
}
/* if all nodes are constrained, then we're out of resources
-- thanks for playing */
if (opal_list_get_size(nodes) == num_constrained) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
}
/* move all nodes w/ allocations to the allocated list */
item = opal_list_get_first(nodes);
while(item != opal_list_get_end(nodes)) {
orte_ras_node_t* node = (orte_ras_node_t*)item;
opal_list_item_t* next = opal_list_get_next(item);
if(node->node_slots_alloc) {
opal_list_remove_item(nodes, item);
opal_list_append(&allocated, item);
}
item = next;
}
rc = orte_ras_base_node_assign(&allocated, jobid);
rc = orte_ras_base_node_assign(nodes, jobid);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}
cleanup:
while(NULL != (item = opal_list_remove_first(&allocated)))
opal_list_append(nodes, item);
OBJ_DESTRUCT(&allocated);
return rc;
}

93
orte/mca/ras/base/ras_base_allocate.c Обычный файл
Просмотреть файл

@ -0,0 +1,93 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/util/output.h"
#include "orte/include/orte_constants.h"
#include "orte/mca/ras/base/base.h"
#include "orte/mca/ras/base/ras_base_node.h"
#include "orte/mca/errmgr/errmgr.h"
/*
* Function for selecting one component from all those that are
* available.
*/
int orte_ras_base_allocate(orte_jobid_t jobid,
orte_ras_base_module_t **module)
{
int ret;
opal_list_item_t *item;
orte_ras_base_cmp_t *cmp;
/* If the list is empty, return NULL */
if (opal_list_is_empty(&orte_ras_base.ras_available)) {
opal_output(orte_ras_base.ras_output,
"orte:ras:base:select: no components available!");
ret = ORTE_ERR_NOT_FOUND;
ORTE_ERROR_LOG(ret);
return ret;
}
/* Otherwise, go through the [already sorted in priority order]
list and initialize them until one of them puts something on
the node segment */
for (item = opal_list_get_first(&orte_ras_base.ras_available);
item != opal_list_get_end(&orte_ras_base.ras_available);
item = opal_list_get_next(item)) {
cmp = (orte_ras_base_cmp_t *) item;
opal_output(orte_ras_base.ras_output,
"orte:ras:base:allocate: attemping to allocate using module: %s",
cmp->component->ras_version.mca_component_name);
if (NULL != cmp->module->allocate) {
ret = cmp->module->allocate(jobid);
if (ORTE_SUCCESS == ret) {
bool empty;
if (ORTE_SUCCESS !=
(ret = orte_ras_base_node_segment_empty(&empty))) {
ORTE_ERROR_LOG(ret);
return ret;
}
/* If this module put something on the node segment,
we're done */
if (!empty) {
opal_output(orte_ras_base.ras_output,
"orte:ras:base:allocate: found good module: %s",
cmp->component->ras_version.mca_component_name);
*module = cmp->module;
return ORTE_SUCCESS;
}
}
}
}
/* We didn't find anyone who put anything on the node segment */
opal_output(orte_ras_base.ras_output,
"orte:ras:base:allocate: no module put anything in the node segment");
ret = ORTE_ERR_NOT_FOUND;
ORTE_ERROR_LOG(ret);
return ret;
}

Просмотреть файл

@ -20,10 +20,9 @@
#include "include/orte_constants.h"
#include "mca/mca.h"
#include "mca/base/base.h"
#include "mca/ras/base/base.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "orte/mca/ras/base/base.h"
int orte_ras_base_finalize(void)
@ -31,14 +30,20 @@ int orte_ras_base_finalize(void)
opal_list_item_t* item;
/* Finalize all available modules */
while((item = opal_list_remove_first(&orte_ras_base.ras_available)) != NULL) {
orte_ras_base_cmp_t* cmp = (orte_ras_base_cmp_t*)item;
cmp->module->finalize();
OBJ_RELEASE(cmp);
if (orte_ras_base.ras_available_valid) {
while (NULL !=
(item = opal_list_remove_first(&orte_ras_base.ras_available))) {
orte_ras_base_cmp_t* cmp = (orte_ras_base_cmp_t*)item;
cmp->module->finalize();
OBJ_RELEASE(cmp);
}
OBJ_DESTRUCT(&orte_ras_base.ras_available);
}
return OMPI_SUCCESS;
return ORTE_SUCCESS;
}
int orte_ras_base_close(void)
{
/* Close all remaining available components (may be one if this is a
@ -47,7 +52,6 @@ int orte_ras_base_close(void)
mca_base_components_close(orte_ras_base.ras_output,
&orte_ras_base.ras_opened, NULL);
OBJ_DESTRUCT(&orte_ras_base.ras_available);
return ORTE_SUCCESS;
}

127
orte/mca/ras/base/ras_base_find_available.c Обычный файл
Просмотреть файл

@ -0,0 +1,127 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "include/orte_constants.h"
#include "mca/mca.h"
#include "mca/base/base.h"
#include "opal/util/output.h"
#include "mca/ras/base/base.h"
/*
* Local functions
*/
static void orte_ras_base_cmp_constructor(orte_ras_base_cmp_t *cmp);
static int compare(opal_list_item_t **a, opal_list_item_t **b);
/*
* Global variables
*/
OBJ_CLASS_INSTANCE(orte_ras_base_cmp_t,
opal_list_item_t,
orte_ras_base_cmp_constructor,
NULL);
/*
* Find all available RAS components and sort them according to
* priority
*/
int orte_ras_base_find_available(void)
{
opal_list_item_t *item;
mca_base_component_list_item_t *cli;
orte_ras_base_component_t *component;
orte_ras_base_module_t *module;
int priority;
orte_ras_base_cmp_t *cmp;
OBJ_CONSTRUCT(&orte_ras_base.ras_available, opal_list_t);
orte_ras_base.ras_available_valid = true;
for (item = opal_list_get_first(&orte_ras_base.ras_opened);
opal_list_get_end(&orte_ras_base.ras_opened) != item;
item = opal_list_get_next(item)) {
cli = (mca_base_component_list_item_t *) item;
component = (orte_ras_base_component_t *) cli->cli_component;
opal_output(orte_ras_base.ras_output,
"orte:ras:base:open: querying component %s",
component->ras_version.mca_component_name);
/* Call the component's init function and see if it wants to be
selected */
module = component->ras_init(&priority);
/* If we got a non-NULL module back, then the component wants
to be considered for selection */
if (NULL != module) {
opal_output(orte_ras_base.ras_output,
"orte:ras:base:open: component %s returns priority %d",
component->ras_version.mca_component_name,
priority);
cmp = OBJ_NEW(orte_ras_base_cmp_t);
cmp->component = component;
cmp->module = module;
cmp->priority = priority;
opal_list_append(&orte_ras_base.ras_available, &cmp->super);
} else {
opal_output(orte_ras_base.ras_output,
"orte:ras:base:open: component %s does NOT want to be considered for selection",
component->ras_version.mca_component_name);
}
}
/* Sort the resulting available list in priority order */
opal_list_sort(&orte_ras_base.ras_available, compare);
return ORTE_SUCCESS;
}
static void orte_ras_base_cmp_constructor(orte_ras_base_cmp_t *cmp)
{
cmp->component = NULL;
cmp->module = NULL;
cmp->priority = -1;
}
/*
* Need to make this an *opposite* compare (this is invoked by qsort)
* so that we get the highest priority first (i.e., so the sort is
* highest->lowest, not lowest->highest)
*/
static int compare(opal_list_item_t **a, opal_list_item_t **b)
{
orte_ras_base_cmp_t *aa = *((orte_ras_base_cmp_t **) a);
orte_ras_base_cmp_t *bb = *((orte_ras_base_cmp_t **) b);
if (bb->priority > aa->priority) {
return 1;
} else if (bb->priority == aa->priority) {
return 0;
} else {
return -1;
}
}

Просмотреть файл

@ -134,137 +134,6 @@ int orte_ras_base_node_query(opal_list_t* nodes)
return ORTE_SUCCESS;
}
/*
* Query the registry for all available nodes
*/
int orte_ras_base_node_query_context(
opal_list_t* nodes,
orte_app_context_t** context,
size_t num_context,
bool *constrained)
{
size_t i, cnt;
orte_gpr_value_t** values;
int rc;
opal_list_t required;
opal_list_item_t* item;
/* expand the list of node/host specifications on the context structure into
* a list of node names
*/
*constrained = false;
OBJ_CONSTRUCT(&required, opal_list_t);
for(i=0; i<num_context; i++) {
orte_app_context_map_t** map = context[i]->map_data;
size_t m, num_map = context[i]->num_map;
for(m=0; m<num_map; m++) {
if(map[m]->map_type == ORTE_APP_CONTEXT_MAP_HOSTNAME) {
char** hosts = opal_argv_split(map[m]->map_data, ',');
char** ptr = hosts;
while(*ptr) {
orte_ras_node_t* node = OBJ_NEW(orte_ras_node_t);
node->node_name = strdup(*ptr);
opal_list_append(&required, (opal_list_item_t*)node);
ptr++;
}
opal_argv_free(hosts);
*constrained = true;
}
}
}
/* query all node entries */
rc = orte_gpr.get(
ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR,
ORTE_NODE_SEGMENT,
NULL,
NULL,
&cnt,
&values);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* parse the response */
for(i=0; i<cnt; i++) {
orte_gpr_value_t* value = values[i];
orte_ras_node_t* node = OBJ_NEW(orte_ras_node_t);
size_t k;
bool found = false;
for(k=0; k<value->cnt; k++) {
orte_gpr_keyval_t* keyval = value->keyvals[k];
if(strcmp(keyval->key, ORTE_NODE_NAME_KEY) == 0) {
node->node_name = strdup(keyval->value.strptr);
continue;
}
if(strcmp(keyval->key, ORTE_NODE_ARCH_KEY) == 0) {
node->node_arch = strdup(keyval->value.strptr);
continue;
}
if(strcmp(keyval->key, ORTE_NODE_STATE_KEY) == 0) {
node->node_state = keyval->value.node_state;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_SLOTS_KEY) == 0) {
node->node_slots = keyval->value.size;
continue;
}
if(strncmp(keyval->key, ORTE_NODE_SLOTS_ALLOC_KEY, strlen(ORTE_NODE_SLOTS_ALLOC_KEY)) == 0) {
node->node_slots_inuse += keyval->value.size;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_SLOTS_MAX_KEY) == 0) {
node->node_slots_max = keyval->value.size;
continue;
}
if(strcmp(keyval->key, ORTE_NODE_USERNAME_KEY) == 0) {
node->node_username = strdup(keyval->value.strptr);
continue;
}
if(strcmp(keyval->key, ORTE_CELLID_KEY) == 0) {
node->node_cellid = keyval->value.cellid;
continue;
}
}
/* contained in app_context? */
for(item = opal_list_get_first(&required);
item != opal_list_get_end(&required);
item = opal_list_get_next(item)) {
if(0 == strcmp(((orte_ras_node_t*)item)->node_name,node->node_name)) {
opal_list_remove_item(&required, item);
OBJ_RELEASE(item);
found = true;
break;
}
}
if(*constrained == false || found) {
opal_list_append(nodes, &node->super);
} else {
OBJ_RELEASE(node);
}
OBJ_RELEASE(value);
}
/* append any remaining specified nodes to the list with
* with default settings for slots,etc.
*/
if(opal_list_get_size(&required)) {
if(ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&required))) {
ORTE_ERROR_LOG(rc);
return rc;
}
while(NULL != (item = opal_list_remove_first(&required))) {
opal_list_append(nodes, item);
}
}
OBJ_DESTRUCT(&required);
if (NULL != values) free(values);
return ORTE_SUCCESS;
}
/*
* Query the registry for all nodes allocated to a specified job
@ -665,3 +534,33 @@ int orte_ras_base_node_assign(opal_list_t* nodes, orte_jobid_t jobid)
return rc;
}
int orte_ras_base_node_segment_empty(bool *empty)
{
int ret;
opal_list_t nodes;
opal_list_item_t *item;
/* See what's already on the node segment */
OBJ_CONSTRUCT(&nodes, opal_list_t);
if (ORTE_SUCCESS != (ret = orte_ras_base_node_query(&nodes))) {
ORTE_ERROR_LOG(ret);
OBJ_DESTRUCT(&nodes);
return ret;
}
*empty = opal_list_is_empty(&nodes) ? true : false;
/* Free the list */
while (NULL != (item = opal_list_remove_first(&nodes))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&nodes);
/* All done */
return ORTE_SUCCESS;
}

Просмотреть файл

@ -13,10 +13,8 @@
*
* $HEADER$
*/
/**
* @file
*
*/
/** @file */
#ifndef ORTE_RAS_BASE_NODE_H
#define ORTE_RAS_BASE_NODE_H
@ -24,47 +22,42 @@
#include "mca/soh/soh_types.h"
#include "mca/rmgr/rmgr_types.h"
#include "mca/ras/ras.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/*
* Convience routines to query/set node state in the registry
*/
/*
* Query the registry for all available nodes
*/
int orte_ras_base_node_query(opal_list_t*);
/*
* Query the registry for all available nodes that satisfy any
* constraints specified on the app_context(s)
*/
int orte_ras_base_node_query_context(opal_list_t*, orte_app_context_t**, size_t num_context, bool* constrained);
/*
/**
* Query the registry for all nodes allocated to a specific job
*/
int orte_ras_base_node_query_alloc(opal_list_t*, orte_jobid_t);
/*
/**
* Add the specified node definitions to the registry
*/
int orte_ras_base_node_insert(opal_list_t*);
/*
/**
* Delete the specified nodes from the registry
*/
int orte_ras_base_node_delete(opal_list_t*);
/*
/**
* Assign the allocated slots on the specified nodes to the
* indicated jobid.
*/
int orte_ras_base_node_assign(opal_list_t*, orte_jobid_t);
/**
* Check to see if the node segment is empty
*/
int orte_ras_base_node_segment_empty(bool *empty);
#if defined(c_plusplus) || defined(__cplusplus)
}

Просмотреть файл

@ -33,51 +33,12 @@
#include "orte/mca/ras/base/static-components.h"
/**
* Local functions.
*/
static void orte_ras_base_cmp_constructor(orte_ras_base_cmp_t *cmp)
{
cmp->component = NULL;
cmp->module = NULL;
cmp->priority = -1;
}
static void orte_ras_base_cmp_destructor(orte_ras_base_cmp_t *cmp)
{
}
/*
* Need to make this an *opposite* compare (this is invoked by qsort)
* so that we get the highest priority first (i.e., so the sort is
* highest->lowest, not lowest->highest)
*/
static int compare(opal_list_item_t **a, opal_list_item_t **b)
{
orte_ras_base_cmp_t *aa = *((orte_ras_base_cmp_t **) a);
orte_ras_base_cmp_t *bb = *((orte_ras_base_cmp_t **) b);
if (bb->priority > aa->priority) {
return 1;
} else if (bb->priority == aa->priority) {
return 0;
} else {
return -1;
}
}
/*
* Global variables
*/
orte_ras_base_module_t orte_ras;
orte_ras_base_t orte_ras_base;
OBJ_CLASS_INSTANCE(
orte_ras_base_cmp_t,
opal_list_item_t,
orte_ras_base_cmp_constructor,
orte_ras_base_cmp_destructor);
/**
@ -86,32 +47,24 @@ OBJ_CLASS_INSTANCE(
*/
int orte_ras_base_open(void)
{
opal_list_item_t *item;
mca_base_component_list_item_t *cli;
orte_ras_base_component_t *component;
orte_ras_base_module_t *module;
int param, priority, value;
orte_ras_base_cmp_t *cmp;
char * policy;
int value;
/* Debugging / verbose output */
orte_ras_base.ras_output = opal_output_open(NULL);
param = mca_base_param_reg_int_name("ras_base", "verbose",
"Verbosity level for the ras framework",
false, false, 0, &value);
mca_base_param_reg_int_name("ras_base", "verbose",
"Enable debugging for the RAS framework (nonzero = enabled)",
false, false, 0, &value);
if (value != 0) {
orte_ras_base.ras_output = opal_output_open(NULL);
} else {
orte_ras_base.ras_output = -1;
}
param = mca_base_param_reg_string_name("ras_base", "schedule_policy",
"Scheduling Policy for RAS. [slot | node]",
false, false, "slot", &policy);
if (0 == strcmp(policy, "node")) {
mca_base_param_set_string(param, "node");
}
/* Defaults */
orte_ras_base.ras_opened_valid = false;
orte_ras_base.ras_available_valid = false;
/* Open up all available components */
@ -121,48 +74,10 @@ int orte_ras_base_open(void)
&orte_ras_base.ras_opened, true)) {
return ORTE_ERROR;
}
OBJ_CONSTRUCT(&orte_ras_base.ras_available, opal_list_t);
for (item = opal_list_get_first(&orte_ras_base.ras_opened);
opal_list_get_end(&orte_ras_base.ras_opened) != item;
item = opal_list_get_next(item)) {
cli = (mca_base_component_list_item_t *) item;
component = (orte_ras_base_component_t *) cli->cli_component;
opal_output(orte_ras_base.ras_output,
"orte:base:open: querying component %s",
component->ras_version.mca_component_name);
/* Call the component's init function and see if it wants to be
selected */
module = component->ras_init(&priority);
/* If we got a non-NULL module back, then the component wants
to be considered for selection */
if (NULL != module) {
opal_output(orte_ras_base.ras_output,
"orte:base:open: component %s returns priority %d",
component->ras_version.mca_component_name,
priority);
cmp = OBJ_NEW(orte_ras_base_cmp_t);
cmp->component = component;
cmp->module = module;
cmp->priority = priority;
opal_list_append(&orte_ras_base.ras_available, &cmp->super);
} else {
opal_output(orte_ras_base.ras_output,
"orte:base:open: component %s does NOT want to be considered for selection",
component->ras_version.mca_component_name);
}
}
/* Sort the resulting available list in priority order */
opal_list_sort(&orte_ras_base.ras_available, compare);
/* All done */
orte_ras_base.ras_opened_valid = true;
return ORTE_SUCCESS;
}

Просмотреть файл

@ -1,102 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "include/orte_constants.h"
#include "mca/mca.h"
#include "mca/base/base.h"
#include "opal/util/output.h"
#include "mca/ras/base/base.h"
/*
* Local functions
*/
static orte_ras_base_module_t *select_preferred(const char *name);
static orte_ras_base_module_t *select_any(void);
/*
* Function for selecting one component from all those that are
* available.
*/
orte_ras_base_module_t* orte_ras_base_select(const char *preferred)
{
orte_ras_base_module_t *module;
if (NULL != preferred) {
module = select_preferred(preferred);
} else {
module = select_any();
}
orte_ras = *module;
return module;
}
static orte_ras_base_module_t *select_preferred(const char *name)
{
opal_list_item_t *item;
orte_ras_base_cmp_t *cmp;
/* Look for a matching selected name */
opal_output(orte_ras_base.ras_output,
"orte:base:select: looking for component %s", name);
for (item = opal_list_get_first(&orte_ras_base.ras_available);
item != opal_list_get_end(&orte_ras_base.ras_available);
item = opal_list_get_next(item)) {
cmp = (orte_ras_base_cmp_t *) item;
if (0 == strcmp(name,
cmp->component->ras_version.mca_component_name)) {
opal_output(orte_ras_base.ras_output,
"orte:base:select: found module for compoent %s", name);
return cmp->module;
}
}
/* Didn't find a matching name */
opal_output(orte_ras_base.ras_output,
"orte:base:select: did not find module for compoent %s", name);
return NULL;
}
static orte_ras_base_module_t *select_any(void)
{
opal_list_item_t *item;
orte_ras_base_cmp_t *cmp;
/* If the list is empty, return NULL */
if (opal_list_is_empty(&orte_ras_base.ras_available)) {
opal_output(orte_ras_base.ras_output,
"orte:base:select: no components available!");
return NULL;
}
/* Otherwise, return the first item (it's already sorted in
priority order) */
item = opal_list_get_first(&orte_ras_base.ras_available);
cmp = (orte_ras_base_cmp_t *) item;
opal_output(orte_ras_base.ras_output,
"orte:base:select: highest priority component: %s",
cmp->component->ras_version.mca_component_name);
return cmp->module;
}

Просмотреть файл

@ -113,11 +113,10 @@ static int orte_ras_bjs_discover(
opal_list_item_t* item;
opal_list_t new_nodes;
int rc;
bool constrained;
/* query the nodelist from the registry */
OBJ_CONSTRUCT(&new_nodes, opal_list_t);
if(ORTE_SUCCESS != (rc = orte_ras_base_node_query_context(nodelist, context, num_context, &constrained))) {
if(ORTE_SUCCESS != (rc = orte_ras_base_node_query(nodelist))) {
ORTE_ERROR_LOG(rc);
return rc;
}
@ -158,10 +157,7 @@ static int orte_ras_bjs_discover(
}
item = next;
}
if(constrained) {
return ORTE_SUCCESS;
}
/* parse the node list and check node status/access */
nodes = getenv("NODES");
@ -254,11 +250,8 @@ static int orte_ras_bjs_allocate(orte_jobid_t jobid)
ORTE_ERROR_LOG(rc);
goto cleanup;
}
if (0 == strcmp(mca_ras_bjs_component.schedule_policy, "node")) {
rc = orte_ras_base_allocate_nodes_by_node(jobid, &nodes);
} else {
rc = orte_ras_base_allocate_nodes_by_slot(jobid, &nodes);
}
rc = orte_ras_base_allocate_nodes(jobid, &nodes);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
}

45
orte/mca/ras/dash_host/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,45 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_ras_dash_host_DSO
component_noinst =
component_install = mca_ras_dash_host.la
else
component_noinst = libmca_ras_dash_host.la
component_install =
endif
dash_host_SOURCES = \
ras_dash_host.h \
ras_dash_host_module.c \
ras_dash_host_component.c
mcacomponentdir = $(libdir)/openmpi
mcacomponent_LTLIBRARIES = $(component_install)
mca_ras_dash_host_la_SOURCES = $(dash_host_SOURCES)
mca_ras_dash_host_la_LIBADD = \
$(top_ompi_builddir)/orte/liborte.la \
$(top_ompi_builddir)/opal/libopal.la
mca_ras_dash_host_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_ras_dash_host_la_SOURCES = $(dash_host_SOURCES)
libmca_ras_dash_host_la_LIBADD =
libmca_ras_dash_host_la_LDFLAGS = -module -avoid-version

21
orte/mca/ras/dash_host/configure.params Обычный файл
Просмотреть файл

@ -0,0 +1,21 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Specific to this module
PARAM_INIT_FILE=ras_dash_host_component.c
PARAM_CONFIG_FILES="Makefile"

57
orte/mca/ras/dash_host/ras_dash_host.h Обычный файл
Просмотреть файл

@ -0,0 +1,57 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* Resource Allocation (dash_host)
*/
#ifndef ORTE_RAS_DASH_HOST_H
#define ORTE_RAS_DASH_HOST_H
#include "mca/ras/ras.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/**
* Dash_host-specific RAS component struct
*/
struct orte_ras_dash_host_component_t {
/** Base RAS component */
orte_ras_base_component_t super;
/** What's the priority of this component */
int priority;
};
/**
* Convenience typedef
*/
typedef struct orte_ras_dash_host_component_t orte_ras_dash_host_component_t;
/**
* Component export structure
*/
OMPI_COMP_EXPORT extern orte_ras_dash_host_component_t mca_ras_dash_host_component;
/**
* Module init function
*/
orte_ras_base_module_t *orte_ras_dash_host_init(int* priority);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -0,0 +1,75 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "opal/util/output.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/include/orte_constants.h"
#include "orte/util/proc_info.h"
#include "orte/mca/ras/dash_host/ras_dash_host.h"
/*
* Local functions
*/
static int orte_ras_dash_host_open(void);
orte_ras_dash_host_component_t mca_ras_dash_host_component = {
{
/* First, the mca_base_component_t struct containing meta
information about the component itself */
{
/* Indicate that we are a iof v1.0.0 component (which also
implies a specific MCA version) */
ORTE_RAS_BASE_VERSION_1_0_0,
"dash_host", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
orte_ras_dash_host_open, /* component open */
NULL
},
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
},
orte_ras_dash_host_init
}
};
/**
* component open function
*/
static int orte_ras_dash_host_open(void)
{
mca_base_param_reg_int(&mca_ras_dash_host_component.super.ras_version,
"priority",
"Selection priority for the dash_host RAS component",
false, false, 5,
&mca_ras_dash_host_component.priority);
return ORTE_SUCCESS;
}

208
orte/mca/ras/dash_host/ras_dash_host_module.c Обычный файл
Просмотреть файл

@ -0,0 +1,208 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "opal/util/output.h"
#include "opal/util/argv.h"
#include "orte/include/orte_constants.h"
#include "orte/include/orte_types.h"
#include "orte/mca/ras/base/base.h"
#include "orte/mca/ras/base/ras_base_node.h"
#include "orte/mca/rmgr/base/base.h"
#include "orte/mca/ras/base/ras_base_node.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ras/dash_host/ras_dash_host.h"
/*
* Local functions
*/
static int orte_ras_dash_host_allocate(orte_jobid_t jobid);
static int orte_ras_dash_host_deallocate(orte_jobid_t jobid);
static int orte_ras_dash_host_finalize(void);
/*
* Local variables
*/
orte_ras_base_module_t orte_ras_dash_host_module = {
orte_ras_dash_host_allocate,
orte_ras_base_node_insert,
orte_ras_base_node_query,
orte_ras_dash_host_deallocate,
orte_ras_dash_host_finalize
};
orte_ras_base_module_t *orte_ras_dash_host_init(int* priority)
{
*priority = mca_ras_dash_host_component.priority;
return &orte_ras_dash_host_module;
}
static int orte_ras_dash_host_allocate(orte_jobid_t jobid)
{
opal_list_t nodes;
opal_list_item_t* item;
orte_app_context_t **context;
size_t i, j, k, num_context;
int rc;
char **mapped_nodes = NULL, **mini_map;
orte_ras_node_t *node;
bool empty;
/* If the node segment is not empty, do nothing */
if (ORTE_SUCCESS != (rc = orte_ras_base_node_segment_empty(&empty))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (!empty) {
opal_output(orte_ras_base.ras_output,
"orte:ras:dash_host: node segment not empty; not doing anything");
return ORTE_SUCCESS;
}
/* Otherwise, get the context */
rc = orte_rmgr_base_get_app_context(jobid, &context, &num_context);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
OBJ_CONSTRUCT(&nodes, opal_list_t);
/* If there's nothing to do, skip to the end */
if (0 == num_context) {
rc = ORTE_SUCCESS;
goto cleanup;
}
/* Otherwise, go through the contexts */
for (i = 0; i < num_context; ++i) {
if (context[i]->num_map > 0) {
orte_app_context_map_t** map = context[i]->map_data;
/* Accumulate all of the host name mappings */
for (j = 0; j < context[i]->num_map; ++j) {
if (ORTE_APP_CONTEXT_MAP_HOSTNAME == map[j]->map_type) {
mini_map = opal_argv_split(map[j]->map_data, ',');
if (mapped_nodes == NULL) {
mapped_nodes = mini_map;
} else {
for (k = 0; NULL != mini_map[k]; ++k) {
rc = opal_argv_append_nosize(&mapped_nodes,
mini_map[k]);
if (OPAL_SUCCESS != rc) {
goto cleanup;
}
}
}
}
}
}
}
/* Did we find anything? */
if (NULL != mapped_nodes) {
/* Go through the names found and add them to the host list.
If they're not unique, then bump the slots count for each
duplicate */
for (i = 0; NULL != mapped_nodes[i]; ++i) {
for (item = opal_list_get_begin(&nodes);
item != opal_list_get_end(&nodes);
item = opal_list_get_next(item)) {
node = (orte_ras_node_t*) item;
if (0 == strcmp(node->node_name, mapped_nodes[i])) {
++node->node_slots;
break;
}
}
/* If we didn't find it, add it to the list */
if (item == opal_list_get_end(&nodes)) {
node = OBJ_NEW(orte_ras_node_t);
if (NULL == node) {
return ORTE_ERR_OUT_OF_RESOURCE;
}
node->node_name = strdup(mapped_nodes[i]);
node->node_arch = NULL;
node->node_state = ORTE_NODE_STATE_UP;
/* JMS: this should not be hard-wired to 0, but there's no
other value to put it to [yet]... */
node->node_cellid = 0;
node->node_slots_inuse = 0;
node->node_slots_max = 0;
node->node_slots = 1;
opal_list_append(&nodes, &node->super);
}
}
/* Put them on the segment and allocate them */
if (ORTE_SUCCESS !=
(rc = orte_ras_base_node_insert(&nodes)) ||
ORTE_SUCCESS !=
(rc = orte_ras_base_allocate_nodes(jobid, &nodes))) {
goto cleanup;
}
}
cleanup:
if (NULL != mapped_nodes) {
opal_argv_free(mapped_nodes);
}
while (NULL != (item = opal_list_remove_first(&nodes))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&nodes);
for (i = 0; i < num_context; i++) {
OBJ_RELEASE(context[i]);
}
free(context);
return rc;
}
static int orte_ras_dash_host_deallocate(orte_jobid_t jobid)
{
/* Nothing to do */
opal_output(orte_ras_base.ras_output,
"ras:dash_host:deallocate: success (nothing to do)");
return ORTE_SUCCESS;
}
static int orte_ras_dash_host_finalize(void)
{
/* Nothing to do */
opal_output(orte_ras_base.ras_output,
"ras:dash_host:finalize: success (nothing to do)");
return ORTE_SUCCESS;
}

Просмотреть файл

@ -1,111 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "include/orte_constants.h"
#include "include/orte_types.h"
#include "mca/ras/base/base.h"
#include "mca/ras/base/ras_base_node.h"
#include "mca/ras/host/ras_host.h"
#include "mca/rmgr/base/base.h"
#include "mca/ras/base/ras_base_node.h"
#include "mca/errmgr/errmgr.h"
#if HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
/**
* Discover available (pre-allocated) nodes. Allocate the
* requested number of nodes/process slots to the job.
*
*/
static int orte_ras_host_allocate(orte_jobid_t jobid)
{
opal_list_t nodes;
opal_list_item_t* item;
orte_app_context_t **context;
size_t i, num_context;
bool constrained;
int rc;
rc = orte_rmgr_base_get_app_context(jobid, &context, &num_context);
if(ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
OBJ_CONSTRUCT(&nodes, opal_list_t);
if(ORTE_SUCCESS != (rc = orte_ras_base_node_query_context(
&nodes, context, num_context, &constrained))) {
ORTE_ERROR_LOG(rc);
return rc;
}
if (0 == strcmp(mca_ras_host_component.schedule_policy, "node")) {
if (ORTE_SUCCESS !=
(rc = orte_ras_base_allocate_nodes_by_node(jobid, &nodes))) {
goto cleanup;
}
} else {
if (ORTE_SUCCESS !=
(rc = orte_ras_base_allocate_nodes_by_slot(jobid, &nodes))) {
goto cleanup;
}
}
cleanup:
while(NULL != (item = opal_list_remove_first(&nodes))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&nodes);
for(i=0; i<num_context; i++)
OBJ_RELEASE(context[i]);
free(context);
return rc;
}
static int orte_ras_host_node_insert(opal_list_t *nodes)
{
return orte_ras_base_node_insert(nodes);
}
static int orte_ras_host_node_query(opal_list_t *nodes)
{
return orte_ras_base_node_query(nodes);
}
static int orte_ras_host_deallocate(orte_jobid_t jobid)
{
return ORTE_SUCCESS;
}
static int orte_ras_host_finalize(void)
{
return ORTE_SUCCESS;
}
orte_ras_base_module_t orte_ras_host_module = {
orte_ras_host_allocate,
orte_ras_host_node_insert,
orte_ras_host_node_query,
orte_ras_host_deallocate,
orte_ras_host_finalize
};

Просмотреть файл

@ -1,116 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "include/orte_constants.h"
#include "mca/base/base.h"
#include "mca/base/mca_base_param.h"
#include "util/proc_info.h"
#include "opal/util/output.h"
#include "mca/ras/host/ras_host.h"
/*
* Local functions
*/
static int orte_ras_host_open(void);
static int orte_ras_host_close(void);
static orte_ras_base_module_t* orte_ras_host_init(int*);
orte_ras_host_component_t mca_ras_host_component = {
{
/* First, the mca_base_component_t struct containing meta
information about the component itself */
{
/* Indicate that we are a iof v1.0.0 component (which also
implies a specific MCA version) */
ORTE_RAS_BASE_VERSION_1_0_0,
"host", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
orte_ras_host_open, /* component open */
orte_ras_host_close /* component close */
},
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
},
orte_ras_host_init
}
};
/**
* component open/close/init function
*/
static int orte_ras_host_open(void)
{
size_t id;
mca_base_param_reg_int(&mca_ras_host_component.super.ras_version, "debug",
"Toggle debug output for Host RAS component",
false, false, 1,
&mca_ras_host_component.debug);
mca_base_param_reg_int(&mca_ras_host_component.super.ras_version, "debug",
"Selection priority for the Host RAS component",
false, false, 1,
&mca_ras_host_component.priority);
/* JMS To be changed post-beta to LAM's C/N command line notation */
id = mca_base_param_find("ras_base", NULL, "schedule_policy");
if (0 > id) {
id = mca_base_param_reg_string_name("ras_base", "schedule_policy",
"Scheduling Policy for RAS. [slot | node]",
false, false, "slot",
&mca_ras_host_component.schedule_policy);
}
else {
mca_base_param_lookup_string(id, &mca_ras_host_component.schedule_policy);
}
return ORTE_SUCCESS;
}
static orte_ras_base_module_t *orte_ras_host_init(int* priority)
{
*priority = mca_ras_host_component.priority;
return &orte_ras_host_module;
}
/**
* Close all subsystems.
*/
static int orte_ras_host_close(void)
{
if (NULL != mca_ras_host_component.schedule_policy) {
free(mca_ras_host_component.schedule_policy);
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -14,36 +14,32 @@
# $HEADER$
#
# Use the top-level Makefile.options
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_ras_host_DSO
if OMPI_BUILD_ras_hostfile_DSO
component_noinst =
component_install = mca_ras_host.la
component_install = mca_ras_hostfile.la
else
component_noinst = libmca_ras_host.la
component_noinst = libmca_ras_hostfile.la
component_install =
endif
host_SOURCES = \
ras_host.c \
ras_host.h \
ras_host_component.c
hostfile_SOURCES = \
ras_hostfile.h \
ras_hostfile_module.c \
ras_hostfile_component.c
mcacomponentdir = $(libdir)/openmpi
mcacomponent_LTLIBRARIES = $(component_install)
mca_ras_host_la_SOURCES = $(host_SOURCES)
mca_ras_host_la_LIBADD = \
mca_ras_hostfile_la_SOURCES = $(hostfile_SOURCES)
mca_ras_hostfile_la_LIBADD = \
$(top_ompi_builddir)/orte/liborte.la \
$(top_ompi_builddir)/opal/libopal.la
mca_ras_host_la_LDFLAGS = -module -avoid-version
mca_ras_hostfile_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_ras_host_la_SOURCES = $(host_SOURCES)
libmca_ras_host_la_LIBADD =
libmca_ras_host_la_LDFLAGS = -module -avoid-version
libmca_ras_hostfile_la_SOURCES = $(hostfile_SOURCES)
libmca_ras_hostfile_la_LIBADD =
libmca_ras_hostfile_la_LDFLAGS = -module -avoid-version

Просмотреть файл

@ -17,5 +17,5 @@
# Specific to this module
PARAM_INIT_FILE=ras_host.c
PARAM_INIT_FILE=ras_hostfile_component.c
PARAM_CONFIG_FILES="Makefile"

57
orte/mca/ras/hostfile/ras_hostfile.h Обычный файл
Просмотреть файл

@ -0,0 +1,57 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* Resource Allocation (hostfile)
*/
#ifndef ORTE_RAS_HOSTFILE_H
#define ORTE_RAS_HOSTFILE_H
#include "mca/ras/ras.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/**
* hostfile-specific RAS component struct
*/
struct orte_ras_hostfile_component_t {
/** Base RAS component */
orte_ras_base_component_t super;
/** What's the priority of this component */
int priority;
};
/**
* Convenience typedef
*/
typedef struct orte_ras_hostfile_component_t orte_ras_hostfile_component_t;
/**
* Component export structure
*/
OMPI_COMP_EXPORT extern orte_ras_hostfile_component_t mca_ras_hostfile_component;
/**
* Module init function
*/
orte_ras_base_module_t *orte_ras_hostfile_init(int* priority);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif
#endif

Просмотреть файл

@ -0,0 +1,75 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "opal/util/output.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/include/orte_constants.h"
#include "orte/util/proc_info.h"
#include "orte/mca/ras/hostfile/ras_hostfile.h"
/*
* Local functions
*/
static int orte_ras_hostfile_open(void);
orte_ras_hostfile_component_t mca_ras_hostfile_component = {
{
/* First, the mca_base_component_t struct containing meta
information about the component itself */
{
/* Indicate that we are a iof v1.0.0 component (which also
implies a specific MCA version) */
ORTE_RAS_BASE_VERSION_1_0_0,
"hostfile", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
orte_ras_hostfile_open, /* component open */
NULL
},
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
},
orte_ras_hostfile_init
}
};
/**
* component open function
*/
static int orte_ras_hostfile_open(void)
{
mca_base_param_reg_int(&mca_ras_hostfile_component.super.ras_version,
"priority",
"Selection priority for the hostfile RAS component",
false, false, 10,
&mca_ras_hostfile_component.priority);
return ORTE_SUCCESS;
}

130
orte/mca/ras/hostfile/ras_hostfile_module.c Обычный файл
Просмотреть файл

@ -0,0 +1,130 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "opal/util/output.h"
#include "opal/util/argv.h"
#include "orte/include/orte_constants.h"
#include "orte/include/orte_types.h"
#include "orte/mca/ras/base/base.h"
#include "orte/mca/ras/base/ras_base_node.h"
#include "orte/mca/rmgr/base/base.h"
#include "orte/mca/ras/base/ras_base_node.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ras/hostfile/ras_hostfile.h"
/*
* Local functions
*/
static int orte_ras_hostfile_allocate(orte_jobid_t jobid);
static int orte_ras_hostfile_deallocate(orte_jobid_t jobid);
static int orte_ras_hostfile_finalize(void);
/*
* Local variables
*/
orte_ras_base_module_t orte_ras_hostfile_module = {
orte_ras_hostfile_allocate,
orte_ras_base_node_insert,
orte_ras_base_node_query,
orte_ras_hostfile_deallocate,
orte_ras_hostfile_finalize
};
orte_ras_base_module_t *orte_ras_hostfile_init(int* priority)
{
*priority = mca_ras_hostfile_component.priority;
return &orte_ras_hostfile_module;
}
/*
* THIS FUNCTION NEEDS TO CHANGE POST-1.0.
*
* After 1.0, this function, and the rds/hostfile need to change to
* clean up properly. They're not "broken" right now, so we're not
* fixing them. But they're implemented wrong, so they should be
* adapted to the model that they're supposed to implement, not the
* workarounds that they currently have. The end result will be much,
* much cleaner.
*
* Specifically, the rds/hostfile currently puts all of its nodes on
* the resource segment *and* the node segment. It should not. It
* should only put its nodes on the resource segment, appropriately
* tagged that they came from a hostfile. The ras/hostfile should
* then examine the resources segment and pull out all nodes that came
* from a hostfile and put them on the nodes segment.
*/
static int orte_ras_hostfile_allocate(orte_jobid_t jobid)
{
opal_list_t nodes;
opal_list_item_t* item;
int rc;
OBJ_CONSTRUCT(&nodes, opal_list_t);
/* Query for all nodes in the node segment that have been
allocated to this job */
if (ORTE_SUCCESS != (rc = orte_ras_base_node_query_alloc(&nodes, jobid))) {
goto cleanup;
}
/* If there are nodes allocated, then query for *all* nodes */
if (opal_list_is_empty(&nodes)) {
if (ORTE_SUCCESS != (rc = orte_ras_base_node_query(&nodes))) {
goto cleanup;
}
/* If there are any nodes at all, allocate them all to this job */
if (!opal_list_is_empty(&nodes)) {
rc = orte_ras_base_allocate_nodes(jobid, &nodes);
goto cleanup;
}
}
cleanup:
while (NULL != (item = opal_list_remove_first(&nodes))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&nodes);
return rc;
}
static int orte_ras_hostfile_deallocate(orte_jobid_t jobid)
{
/* Nothing to do */
opal_output(orte_ras_base.ras_output,
"ras:hostfile:deallocate: success (nothing to do)");
return ORTE_SUCCESS;
}
static int orte_ras_hostfile_finalize(void)
{
/* Nothing to do */
opal_output(orte_ras_base.ras_output,
"ras:hostfile:finalize: success (nothing to do)");
return ORTE_SUCCESS;
}

45
orte/mca/ras/localhost/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,45 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if OMPI_BUILD_ras_localhost_DSO
component_noinst =
component_install = mca_ras_localhost.la
else
component_noinst = libmca_ras_localhost.la
component_install =
endif
localhost_SOURCES = \
ras_localhost.h \
ras_localhost_module.c \
ras_localhost_component.c
mcacomponentdir = $(libdir)/openmpi
mcacomponent_LTLIBRARIES = $(component_install)
mca_ras_localhost_la_SOURCES = $(localhost_SOURCES)
mca_ras_localhost_la_LIBADD = \
$(top_ompi_builddir)/orte/liborte.la \
$(top_ompi_builddir)/opal/libopal.la
mca_ras_localhost_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_ras_localhost_la_SOURCES = $(localhost_SOURCES)
libmca_ras_localhost_la_LIBADD =
libmca_ras_localhost_la_LDFLAGS = -module -avoid-version

21
orte/mca/ras/localhost/configure.params Обычный файл
Просмотреть файл

@ -0,0 +1,21 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University.
# All rights reserved.
# Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
# All rights reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Specific to this module
PARAM_INIT_FILE=ras_localhost_component.c
PARAM_CONFIG_FILES="Makefile"

Просмотреть файл

@ -18,30 +18,38 @@
*
* Resource Allocation (Host)
*/
#ifndef ORTE_RAS_HOST_H
#define ORTE_RAS_HOST_H
#ifndef ORTE_RAS_LOCALHOST_H
#define ORTE_RAS_LOCALHOST_H
#include "mca/ras/ras.h"
#if defined(c_plusplus) || defined(__cplusplus)
extern "C" {
#endif
/**
* Localhost-specific RAS component struct
*/
struct orte_ras_localhost_component_t {
/** Base RAS component */
orte_ras_base_component_t super;
/** What's the priority of this component */
int priority;
};
/**
* Convenience typedef
*/
typedef struct orte_ras_localhost_component_t orte_ras_localhost_component_t;
/**
* RAS Component
*/
struct orte_ras_host_component_t {
orte_ras_base_component_t super;
int debug;
int priority;
char *schedule_policy;
};
typedef struct orte_ras_host_component_t orte_ras_host_component_t;
OMPI_COMP_EXPORT extern orte_ras_host_component_t mca_ras_host_component;
OMPI_COMP_EXPORT extern orte_ras_base_module_t orte_ras_host_module;
/**
* Component export structure
*/
OMPI_COMP_EXPORT extern orte_ras_localhost_component_t mca_ras_localhost_component;
/**
* Module init function
*/
orte_ras_base_module_t *orte_ras_localhost_init(int* priority);
#if defined(c_plusplus) || defined(__cplusplus)
}
#endif

Просмотреть файл

@ -0,0 +1,75 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "opal/util/output.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/include/orte_constants.h"
#include "orte/util/proc_info.h"
#include "orte/mca/ras/localhost/ras_localhost.h"
/*
* Local functions
*/
static int orte_ras_localhost_open(void);
orte_ras_localhost_component_t mca_ras_localhost_component = {
{
/* First, the mca_base_component_t struct containing meta
information about the component itself */
{
/* Indicate that we are a iof v1.0.0 component (which also
implies a specific MCA version) */
ORTE_RAS_BASE_VERSION_1_0_0,
"localhost", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
orte_ras_localhost_open, /* component open */
NULL
},
/* Next the MCA v1.0.0 component meta data */
{
/* Whether the component is checkpointable or not */
false
},
orte_ras_localhost_init
}
};
/**
* component open function
*/
static int orte_ras_localhost_open(void)
{
mca_base_param_reg_int(&mca_ras_localhost_component.super.ras_version,
"priority",
"Selection priority for the localhost RAS component",
false, false, 0,
&mca_ras_localhost_component.priority);
return ORTE_SUCCESS;
}

136
orte/mca/ras/localhost/ras_localhost_module.c Обычный файл
Просмотреть файл

@ -0,0 +1,136 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
* All rights reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "opal/class/opal_list.h"
#include "opal/util/output.h"
#include "orte/include/orte_constants.h"
#include "orte/include/orte_types.h"
#include "orte/mca/ras/base/base.h"
#include "orte/mca/ras/base/ras_base_node.h"
#include "orte/mca/rmgr/base/base.h"
#include "orte/mca/ras/base/ras_base_node.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ras/localhost/ras_localhost.h"
/*
* Local functions
*/
static int orte_ras_localhost_allocate(orte_jobid_t jobid);
static int orte_ras_localhost_deallocate(orte_jobid_t jobid);
static int orte_ras_localhost_finalize(void);
/*
* Local variables
*/
orte_ras_base_module_t orte_ras_localhost_module = {
orte_ras_localhost_allocate,
orte_ras_base_node_insert,
orte_ras_base_node_query,
orte_ras_localhost_deallocate,
orte_ras_localhost_finalize
};
orte_ras_base_module_t *orte_ras_localhost_init(int* priority)
{
*priority = mca_ras_localhost_component.priority;
return &orte_ras_localhost_module;
}
static int orte_ras_localhost_allocate(orte_jobid_t jobid)
{
bool empty;
int ret;
opal_list_t nodes;
orte_ras_node_t *node;
opal_list_item_t *item;
/* If the node segment is not empty, do nothing */
if (ORTE_SUCCESS != (ret = orte_ras_base_node_segment_empty(&empty))) {
ORTE_ERROR_LOG(ret);
return ret;
}
if (!empty) {
opal_output(orte_ras_base.ras_output,
"orte:ras:localhost: node segment not empty; not doing anything");
return ORTE_SUCCESS;
}
opal_output(orte_ras_base.ras_output,
"orte:ras:localhost: node segment empty; adding \"localhost\"");
/* Ok, the node segment is empty -- so add a localhost node */
node = OBJ_NEW(orte_ras_node_t);
if (NULL == node) {
return ORTE_ERR_OUT_OF_RESOURCE;
}
node->node_name = strdup("localhost");
node->node_arch = NULL;
node->node_state = ORTE_NODE_STATE_UP;
/* JMS: this should not be hard-wired to 0, but there's no
other value to put it to [yet]... */
node->node_cellid = 0;
node->node_slots_inuse = 0;
node->node_slots_max = 0;
node->node_slots = 1;
OBJ_CONSTRUCT(&nodes, opal_list_t);
opal_list_append(&nodes, &node->super);
/* Put it on the segment and allocate it */
if (ORTE_SUCCESS !=
(ret = orte_ras_base_node_insert(&nodes)) ||
ORTE_SUCCESS !=
(ret = orte_ras_base_allocate_nodes(jobid, &nodes))) {
goto cleanup;
}
cleanup:
item = opal_list_remove_first(&nodes);
OBJ_RELEASE(item);
OBJ_DESTRUCT(&nodes);
/* All done */
return ret;
}
static int orte_ras_localhost_deallocate(orte_jobid_t jobid)
{
/* Nothing to do */
opal_output(orte_ras_base.ras_output,
"ras:localhost:deallocate: success (nothing to do)");
return ORTE_SUCCESS;
}
static int orte_ras_localhost_finalize(void)
{
/* Nothing to do */
opal_output(orte_ras_base.ras_output,
"ras:localhost:finalize: success (nothing to do)");
return ORTE_SUCCESS;
}

Просмотреть файл

@ -55,7 +55,6 @@ orte_ras_base_module_t orte_ras_slurm_module = {
finalize
};
/**
* Discover available (pre-allocated) nodes. Allocate the
* requested number of nodes/process slots to the job.
@ -81,7 +80,8 @@ static int allocate(orte_jobid_t jobid)
"ras:slurm:allocate: discover failed!");
return ret;
}
ret = orte_ras_base_allocate_nodes_by_slot(jobid, &nodes);
ret = orte_ras_base_allocate_nodes(jobid, &nodes);
while (NULL != (item = opal_list_remove_first(&nodes))) {
OBJ_RELEASE(item);

Просмотреть файл

@ -84,7 +84,7 @@ static int allocate(orte_jobid_t jobid)
tm_finalize();
return ret;
}
ret = orte_ras_base_allocate_nodes_by_slot(jobid, &nodes);
ret = orte_ras_base_allocate_nodes(jobid, &nodes);
while (NULL != (item = opal_list_remove_first(&nodes))) {
OBJ_RELEASE(item);

Просмотреть файл

@ -71,7 +71,7 @@ static int allocate(orte_jobid_t jobid)
"ras:xgrid:allocate: discover failed!");
return ret;
}
ret = orte_ras_base_allocate_nodes_by_node(jobid, &nodes);
ret = orte_ras_base_allocate_nodes(jobid, &nodes);
while (NULL != (item = opal_list_remove_first(&nodes))) {
OBJ_RELEASE(item);

Просмотреть файл

@ -20,20 +20,21 @@
#include <errno.h>
#include <string.h>
#include "include/orte_constants.h"
#include "opal/class/opal_list.h"
#include "opal/util/output.h"
#include "opal/util/argv.h"
#include "util/sys_info.h"
#include "mca/mca.h"
#include "mca/base/base.h"
#include "mca/ns/ns.h"
#include "mca/errmgr/errmgr.h"
#include "mca/ras/ras.h"
#include "mca/rds/rds.h"
#include "mca/rds/base/base.h"
#include "mca/rds/hostfile/rds_hostfile.h"
#include "mca/rds/hostfile/rds_hostfile_lex.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "orte/include/orte_constants.h"
#include "orte/util/sys_info.h"
#include "orte/mca/ns/ns.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/ras/ras.h"
#include "orte/mca/ras/base/ras_base_node.h"
#include "orte/mca/rds/rds.h"
#include "orte/mca/rds/base/base.h"
#include "orte/mca/rds/hostfile/rds_hostfile.h"
#include "orte/mca/rds/hostfile/rds_hostfile_lex.h"
#include "runtime/runtime_types.h"
@ -334,7 +335,7 @@ static int orte_rds_hostfile_query(void)
OBJ_CONSTRUCT(&existing, opal_list_t);
OBJ_CONSTRUCT(&updates, opal_list_t);
OBJ_CONSTRUCT(&rds_updates, opal_list_t);
rc = orte_ras.node_query(&existing);
rc = orte_ras_base_node_query(&existing);
if(ORTE_SUCCESS != rc) {
goto cleanup;
}
@ -418,7 +419,7 @@ static int orte_rds_hostfile_query(void)
* resources listed in the hostfile have been
* already allocated for our use.
*/
rc = orte_ras.node_insert(&updates);
rc = orte_ras_base_node_insert(&updates);
if (ORTE_SUCCESS != rc) {
goto cleanup;
}

Просмотреть файл

@ -195,6 +195,94 @@ orte_rmaps_lookup_node(opal_list_t* nodes, char* node_name, orte_rmaps_base_proc
return node;
}
/**
* Obtain the mapping for this job, and the list of nodes confined to that mapping.
*
* Use this instead of orte_ras_base_node_query when past the RMAPS framework
* since components like the PLS are only conserned with those nodes that they
* been mapped on, not all of the nodes allocated to their job. In the case
* where we are allocated 10 nodes from the RAS, but only map to 2 of them
* then we don't try to launch orteds on all 10 nodes, just the 2 mapped.
*/
int orte_rmaps_base_mapped_node_query(opal_list_t* mapping_list, opal_list_t* nodes_alloc, orte_jobid_t jobid) {
opal_list_t nodes;
opal_list_item_t *item_a, *item_m, *item_n;
int num_mapping = 0;
int rc = ORTE_SUCCESS;
bool matched = false;
/* get all nodes allocated to this job */
OBJ_CONSTRUCT(&nodes, opal_list_t);
rc = orte_ras_base_node_query_alloc(&nodes, jobid);
if (ORTE_SUCCESS != rc) {
goto cleanup;
}
/* get the mapping for this job */
rc = orte_rmaps_base_get_map(jobid, mapping_list);
if (ORTE_SUCCESS != rc) {
goto cleanup;
}
num_mapping = opal_list_get_size(mapping_list);
/* Create a list of nodes that are in the mapping */
for( item_m = opal_list_get_first(mapping_list);
item_m != opal_list_get_end(mapping_list);
item_m = opal_list_get_next(item_m)) {
orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)item_m;
/* Iterate over all the nodes mapped and check them against the
* allocated node list */
for( item_n = opal_list_get_first(&(map->nodes));
item_n != opal_list_get_end(&(map->nodes));
item_n = opal_list_get_next(item_n)) {
matched = false;
/* If this node is in the list already, skip it */
for( item_a = opal_list_get_first(nodes_alloc);
item_a != opal_list_get_end(nodes_alloc);
item_a = opal_list_get_next(item_a)) {
if( 0 == strcmp( ((orte_ras_node_t*) item_a)->node_name,
((orte_rmaps_base_node_t*) item_n)->node_name) ) {
matched = true;
break;
}
}
if(matched){
continue;
}
/* Otherwise
* - Find it in the node list from the node segment,
* - Add it to the allocated list of nodes
*/
matched = false;
for( item_a = opal_list_get_first(&nodes);
item_a != opal_list_get_end(&nodes);
item_a = opal_list_get_next(item_a)) {
if( 0 == strcmp( ((orte_ras_node_t*) item_a)->node_name,
((orte_rmaps_base_node_t*) item_n)->node_name) ) {
matched = true;
break;
}
}
if(!matched) {
printf("Unable to find the matched node in the allocation. This should never happen\n");
return ORTE_ERROR;
}
opal_list_remove_item(&nodes, item_a);
opal_list_append(nodes_alloc, item_a);
}
}
cleanup:
while (NULL != (item_a = opal_list_remove_first(&nodes))) {
OBJ_RELEASE(item_a);
}
return rc;
}
/**
* Query the process mapping from the registry.

Просмотреть файл

@ -97,7 +97,7 @@ typedef struct orte_rmaps_base_map_t orte_rmaps_base_map_t;
OBJ_CLASS_DECLARATION(orte_rmaps_base_map_t);
int orte_rmaps_base_mapped_node_query(opal_list_t* mapping_list, opal_list_t* nodes_alloc, orte_jobid_t jobid);
int orte_rmaps_base_get_map(orte_jobid_t, opal_list_t* mapping);
int orte_rmaps_base_set_map(orte_jobid_t, opal_list_t* mapping);
int orte_rmaps_base_get_node_map(orte_cellid_t, orte_jobid_t, const char*, opal_list_t* mapping);

Просмотреть файл

@ -61,6 +61,7 @@ int orte_rmaps_base_open(void)
orte_rmaps_base_module_t *module;
int param, priority, value;
orte_rmaps_base_cmp_t *cmp;
char *policy;
/* Debugging / verbose output */
@ -73,6 +74,15 @@ int orte_rmaps_base_open(void)
orte_rmaps_base.rmaps_output = -1;
}
/* Are we scheduling by node or by slot? */
param = mca_base_param_reg_string_name("rmaps_base", "schedule_policy",
"Scheduling Policy for RMAPS. [slot | node]",
false, false, "slot", &policy);
if (0 == strcmp(policy, "node")) {
mca_base_param_set_string(param, "node");
}
/* Open up all the components that we can find */
if (ORTE_SUCCESS !=

Просмотреть файл

@ -21,4 +21,20 @@
Either request fewer slots for your application, or make more slots available
for use.
[orte-rmaps-rr:no-mapped-node]
There are no allocated resources for the application
%s
that match the requested mapping:
%s
Verify that you have mapped the allocated resources properly using the
--host specification.
[orte-rmaps-rr:not-all-mapped-alloc]
Some of the requested hosts are not included in the current allocation for the
application:
%s
The requested hosts were:
%s
Verify that you have mapped the allocated resources properly using the
--host specification.

Просмотреть файл

@ -22,6 +22,7 @@
#include "include/orte_types.h"
#include "opal/util/output.h"
#include "opal/util/show_help.h"
#include "opal/util/argv.h"
#include "mca/ns/ns.h"
#include "mca/gpr/gpr.h"
#include "mca/rmaps/base/base.h"
@ -36,9 +37,108 @@
*/
static opal_list_item_t *cur_node_item = NULL;
/*
* A sanity check to ensure that all of the requested nodes are actually
* allocated to this application.
*/
static bool are_all_mapped_valid(char **mapping,
int num_mapped,
opal_list_t* nodes)
{
opal_list_item_t *item;
int i;
bool matched;
for (i = 0; i < num_mapped; ++i) {
matched = false;
for(item = opal_list_get_first(nodes);
item != opal_list_get_end(nodes);
item = opal_list_get_next(item) ) {
if( 0 == strcmp( ((orte_ras_node_t*) item)->node_name, mapping[i]) ) {
matched = true;
break;
}
}
/* If we find one requested resource that is not allocated,
* then return an error */
if(!matched) {
return false;
}
}
return true;
}
/*
* If the node in question is in the current mapping.
*/
static bool is_mapped(opal_list_item_t *item,
char **mapping,
int num_mapped,
opal_list_t* nodes)
{
int i;
for ( i = 0; i < num_mapped; ++i) {
if ( 0 == strcmp( ((orte_ras_node_t*) item)->node_name, mapping[i])){
return true;
}
}
return false;
}
/*
* Return a point to the next node allocated, included in the mapping.
*/
static opal_list_item_t* get_next_mapped(opal_list_item_t *node_item,
char **mapping,
int num_mapped,
opal_list_t* nodes)
{
opal_list_item_t *item;
/* Wrap around to beginning if we are at the end of the list */
if (opal_list_get_end(nodes) == opal_list_get_next(node_item)) {
item = opal_list_get_first(nodes);
}
else {
item = opal_list_get_next(node_item);
}
do {
/* See if current node is in the mapping and contains slots */
if( is_mapped(item, mapping, num_mapped, nodes) ) {
return item;
}
/*
* We just rechecked the current item and concluded that
* it wasn't in the list, thus the list contains no matches
* in this mapping. Return an error.
*/
if(node_item == item){
return NULL;
}
/* Access next item in Round Robin Manner */
if (opal_list_get_end(nodes) == opal_list_get_next(item)) {
item = opal_list_get_first(nodes);
}
else {
item = opal_list_get_next(item);
}
} while( true );
}
static int claim_slot(orte_rmaps_base_map_t *map,
orte_ras_node_t *current_node,
orte_jobid_t jobid, orte_vpid_t vpid, int proc_index)
orte_jobid_t jobid,
orte_vpid_t vpid,
int proc_index)
{
orte_rmaps_base_proc_t *proc;
orte_process_name_t *proc_name;
@ -50,8 +150,9 @@ static int claim_slot(orte_rmaps_base_map_t *map,
if (NULL == rmaps_node) {
return ORTE_ERR_OUT_OF_RESOURCE;
}
rmaps_node->node_name = strdup(current_node->node_name);
proc = OBJ_NEW(orte_rmaps_base_proc_t);
if (NULL == proc) {
OBJ_RELEASE(rmaps_node);
@ -77,9 +178,9 @@ static int claim_slot(orte_rmaps_base_map_t *map,
/* Save this node on the map */
opal_list_append(&map->nodes, &rmaps_node->super);
/* Decrease the number of slots available for allocation
on this node */
--current_node->node_slots_alloc;
/* Be sure to demarcate this slot claim for the node */
current_node->node_slots_inuse++;
return ORTE_SUCCESS;
}
@ -87,11 +188,6 @@ static int claim_slot(orte_rmaps_base_map_t *map,
/*
* Create a default mapping for the application, scheduling round
* robin by node.
*
* NOTE: This function assumes that the allocator has already setup
* the list of nodes such that the sum of the node_slots_alloc fields
* from all entries will be the total number of processes in all the
* apps.
*/
static int map_app_by_node(
orte_app_context_t* app,
@ -99,124 +195,9 @@ static int map_app_by_node(
orte_jobid_t jobid,
orte_vpid_t vpid_start,
int rank,
opal_list_t* nodes)
{
int rc;
size_t num_alloc = 0;
size_t proc_index = 0;
opal_list_item_t *start, *next;
orte_ras_node_t *node;
bool did_alloc;
/* Note that cur_node_item already points to the Right place in
the node list to start looking (i.e., if this is the first time
through, it'll point to the first item. If this is not the
first time through -- i.e., we have multiple app contexts --
it'll point to where we left off last time.). If we're at the
end, bounce back to the front (as would happen in the loop
below)
But do a bozo check to ensure that we don't have a empty node
list. */
if (0 == opal_list_get_size(nodes)) {
return ORTE_ERR_TEMP_OUT_OF_RESOURCE;
} else if (opal_list_get_end(nodes) == cur_node_item) {
cur_node_item = opal_list_get_first(nodes);
}
start = cur_node_item;
/* This loop continues until all procs have been mapped or we run
out of resources. There are two definitions of "run out of
resources":
1. All nodes have node_slots processes mapped to them
2. All nodes have node_slots_max processes mapped to them
We first map until condition #1 is met. If there are still
processes that haven't been mapped yet, then we continue until
condition #2 is met. If we still have processes that haven't
been mapped yet, then it's an "out of resources" error. */
did_alloc = false;
while (num_alloc < app->num_procs) {
node = (orte_ras_node_t*) cur_node_item;
next = opal_list_get_next(cur_node_item);
/* If we have an available slot on this node, claim it */
if (node->node_slots_alloc > 0) {
fflush(stdout);
rc = claim_slot(map, node, jobid, vpid_start + rank, proc_index);
if (ORTE_SUCCESS != rc) {
return rc;
}
if (node->node_slots_alloc == 0) {
opal_list_remove_item(nodes, (opal_list_item_t*)node);
OBJ_RELEASE(node);
}
++rank;
++proc_index;
/* Save the fact that we successfully allocated a process
to a node in this round */
did_alloc = true;
/* Increase the number of procs allocated and see if we're
done */
++num_alloc;
}
/* Move on to the next node */
cur_node_item = next;
if (opal_list_get_end(nodes) == cur_node_item) {
cur_node_item = opal_list_get_first(nodes);
}
/* Are we done? */
if (num_alloc == app->num_procs) {
break;
}
/* Double check that the list is not empty */
if (opal_list_get_end(nodes) == cur_node_item) {
opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:alloc-error",
true, num_alloc, app->num_procs);
return ORTE_ERR_TEMP_OUT_OF_RESOURCE;
}
/* If we looped around without allocating any new processes,
then we're full */
if (start == cur_node_item) {
if (!did_alloc) {
opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:alloc-error",
true, num_alloc, app->num_procs);
return ORTE_ERR_TEMP_OUT_OF_RESOURCE;
}
}
}
map->num_procs = num_alloc;
return ORTE_SUCCESS;
}
/*
* Create a default mapping for the application, scheduling one round
* robin by slot.
*
* NOTE: This function assumes that the allocator has already setup
* the list of nodes such that the sum of the node_slots_alloc fields
* from all entries will be the total number of processes in all the
* apps.
*/
static int map_app_by_slot(
orte_app_context_t* app,
orte_rmaps_base_map_t* map,
orte_jobid_t jobid,
orte_vpid_t vpid_start,
int rank,
opal_list_t* nodes)
opal_list_t* nodes,
char **mapped_nodes,
int num_mapped_nodes)
{
int rc;
size_t num_alloc = 0;
@ -224,67 +205,179 @@ static int map_app_by_slot(
opal_list_item_t *next;
orte_ras_node_t *node;
/* Note that cur_node_item already points to the Right place in
the node list to start looking (i.e., if this is the first time
through, it'll point to the first item. If this is not the
first time through -- i.e., we have multiple app contexts --
it'll point to where we left off last time.). If we're at the
end, bounce back to the front (as would happen in the loop
below)
But do a bozo check to ensure that we don't have a empty node
list. */
if (0 == opal_list_get_size(nodes)) {
return ORTE_ERR_TEMP_OUT_OF_RESOURCE;
} else if (opal_list_get_end(nodes) == cur_node_item) {
cur_node_item = opal_list_get_first(nodes);
}
/* Go through all nodes and take up to node_slots_alloc slots and
map it to this job */
while (opal_list_get_end(nodes) != cur_node_item &&
num_alloc < app->num_procs) {
/* This loop continues until all procs have been mapped or we run
out of resources. We determine that we have "run out of
resources" when all nodes have node_slots_max processes mapped to them,
thus there are no free slots for a process to be mapped.
If we still have processes that haven't been mapped yet, then it's an
"out of resources" error. */
while (num_alloc < app->num_procs) {
node = (orte_ras_node_t*) cur_node_item;
next = opal_list_get_next(cur_node_item);
/* Find the next node we can use before claiming slots, since
* we may need to prune the nodes list removing overused nodes */
if ( 0 < app->num_map ) {
next = get_next_mapped(cur_node_item, mapped_nodes, num_mapped_nodes, nodes);
if (NULL == next ) {
/* Not allocated anything */
opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:no-mapped-node",
true, app->app, opal_argv_join(mapped_nodes, ','));
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
}
else {
if (opal_list_get_end(nodes) == opal_list_get_next(cur_node_item)) {
next = opal_list_get_first(nodes);
}
else {
next = opal_list_get_next(cur_node_item);
}
}
/* If we have available slots on this node, claim it */
while (node->node_slots_alloc > 0 &&
num_alloc < app->num_procs) {
fflush(stdout);
/* Remove this node if it has reached its max number of allocatable slots */
if( 0 != node->node_slots_max &&
node->node_slots_inuse >= node->node_slots_max) {
opal_list_remove_item(nodes, (opal_list_item_t*)node);
if(0 >= opal_list_get_size(nodes) ) {
/* No more nodes to allocate :( */
opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:alloc-error",
true, num_alloc, app->num_procs);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
}
/* Allocate this node */
else {
rc = claim_slot(map, node, jobid, vpid_start + rank, proc_index);
if (ORTE_SUCCESS != rc) {
return rc;
goto cleanup;
}
++rank;
++proc_index;
/* Increase the number of procs allocated and see if we're
done */
++num_alloc;
}
if (node->node_slots_alloc == 0) {
opal_list_remove_item(nodes, (opal_list_item_t*)node);
OBJ_RELEASE(node);
/* Move on to the next node since we have allocated all of
this node's slots */
cur_node_item = next;
}
}
/* Did we allocate everything? */
if (num_alloc < app->num_procs) {
opal_show_help("help-rmaps-rr.txt", "rmaps-rr:alloc-error",
true, num_alloc, app->num_procs);
return ORTE_ERR_OUT_OF_RESOURCE;
cur_node_item = next;
}
map->num_procs = num_alloc;
return ORTE_SUCCESS;
cleanup:
return rc;
}
/*
* Create a default mapping for the application, scheduling one round
* robin by slot.
*/
static int map_app_by_slot(
orte_app_context_t* app,
orte_rmaps_base_map_t* map,
orte_jobid_t jobid,
orte_vpid_t vpid_start,
int rank,
opal_list_t* nodes,
char **mapped_nodes,
int num_mapped_nodes)
{
int rc = ORTE_SUCCESS;
size_t i;
size_t num_alloc = 0;
size_t proc_index = 0;
orte_ras_node_t *node;
opal_list_item_t *start, *next;
bool oversubscribe;
/* This loop continues until all procs have been mapped or we run
out of resources. We determine that we have "run out of
resources" when all nodes have node_slots_max processes mapped to them,
thus there are no free slots for a process to be mapped.
If we still have processes that haven't been mapped yet, then it's an
"out of resources" error. */
num_alloc = 0;
start = cur_node_item;
oversubscribe = false;
while ( num_alloc < app->num_procs) {
node = (orte_ras_node_t*) cur_node_item;
/* Find the next node we can use before claiming slots, since
* we may need to prune the nodes list removing over used nodes */
if ( 0 < app->num_map ) {
next = get_next_mapped(cur_node_item, mapped_nodes, num_mapped_nodes, nodes);
if (NULL == next ) {
/* Not allocated anything */
opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:no-mapped-node",
true, app->app, opal_argv_join(mapped_nodes, ','));
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
}
else {
if (opal_list_get_end(nodes) == opal_list_get_next(cur_node_item)) {
next = opal_list_get_first(nodes);
}
else {
next = opal_list_get_next(cur_node_item);
}
}
/* If we have available slots on this node, claim all of them
* If node_slots == 0, assume 1 slot for that node.
* JJH - is this assumption fully justified? */
for( i = 0; i < ((node->node_slots == 0) ? 1 : node->node_slots); ++i) {
/* If we are not oversubscribing, and this node is full, skip it. */
if( !oversubscribe &&
0 != node->node_slots &&
node->node_slots_inuse > node->node_slots) {
break;
}
/* If this node has reached its max number of slots,
* take it out of the list, and skip it */
else if( 0 != node->node_slots_max &&
node->node_slots_inuse >= node->node_slots_max){
opal_list_remove_item(nodes, (opal_list_item_t*)node);
if( 0 >= opal_list_get_size(nodes) ) {
/* No more nodes to allocate */
opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:alloc-error",
true, num_alloc, app->num_procs);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
break;
}
rc = claim_slot(map, node, jobid, vpid_start + rank, proc_index);
if (ORTE_SUCCESS != rc) {
goto cleanup;
}
/* Increase the number of procs allocated */
++num_alloc;
++rank;
++proc_index;
if(num_alloc >= app->num_procs) {
break;
}
}
cur_node_item = next;
/* Since we have now looped back around, go ahead and oversubscribe nodes */
if(start == cur_node_item) {
oversubscribe = true;
}
}
map->num_procs = num_alloc;
cleanup:
return rc;
}
@ -294,8 +387,9 @@ static int map_app_by_slot(
static int orte_rmaps_rr_map(orte_jobid_t jobid)
{
orte_app_context_t** context;
size_t i, num_context;
orte_app_context_t** context, *app;
orte_rmaps_base_map_t* map;
size_t i, j, k, num_context;
opal_list_t nodes;
opal_list_t mapping;
opal_list_item_t* item;
@ -304,6 +398,8 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid)
int rank = 0;
int rc = ORTE_SUCCESS;
bool bynode = true;
char **mapped_nodes = NULL;
int num_mapped_nodes = 0;
/* query for the application context and allocated nodes */
if(ORTE_SUCCESS != (rc = orte_rmgr_base_get_app_context(jobid, &context, &num_context))) {
@ -317,9 +413,26 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid)
bynode = false;
}
/* total number of procs required */
/* query for all nodes allocated to this job */
OBJ_CONSTRUCT(&nodes, opal_list_t);
if(ORTE_SUCCESS != (rc = orte_ras_base_node_query_alloc(&nodes, jobid))) {
OBJ_DESTRUCT(&nodes);
return rc;
}
/* Sanity check to make sure we have been allocated nodes */
if (0 == opal_list_get_size(&nodes)) {
OBJ_DESTRUCT(&nodes);
return ORTE_ERR_TEMP_OUT_OF_RESOURCE;
}
/* Total number of procs required
* DEVEL NOTE: Need to extend this when implementing C/N notation
* Will need to set the app->num_procs approprately before this,
* Since we will have the allocated node information at this point.
*/
for(i=0; i<num_context; i++) {
orte_app_context_t* app = context[i];
app = context[i];
num_procs += app->num_procs;
}
@ -328,19 +441,14 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid)
return rc;
}
/* query for all nodes allocated to this job */
OBJ_CONSTRUCT(&nodes, opal_list_t);
if(ORTE_SUCCESS != (rc = orte_ras_base_node_query_alloc(&nodes, jobid))) {
OBJ_DESTRUCT(&nodes);
return rc;
}
/* construct a default mapping */
/* construct a default mapping by application */
OBJ_CONSTRUCT(&mapping, opal_list_t);
cur_node_item = opal_list_get_first(&nodes);
for(i=0; i<num_context; i++) {
orte_app_context_t* app = context[i];
orte_rmaps_base_map_t* map = OBJ_NEW(orte_rmaps_base_map_t);
app = context[i];
map = OBJ_NEW(orte_rmaps_base_map_t);
if(NULL == map) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
@ -353,15 +461,86 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid)
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
/* Extract the requested mapping for this application */
/* Note that cur_node_item already points to the Right place in
the node list to start looking (i.e., if this is the first time
through, it'll point to the first item. If this is not the
first time through -- i.e., we have multiple app contexts --
it'll point to where we left off last time.). If we're at the
end, bounce back to the front (as would happen in the loop
below)
But do a bozo check to ensure that we don't have a empty node list.*/
if (0 == opal_list_get_size(&nodes)) {
rc = ORTE_ERR_TEMP_OUT_OF_RESOURCE;
goto cleanup;
} else if (opal_list_get_end(&nodes) == cur_node_item) {
cur_node_item = opal_list_get_first(&nodes);
}
/* If this application has a mapping then
* - if the current node is in the mapping, use it
* - ow get the next node in that mapping.
*/
if ( 0 < app->num_map ) {
orte_app_context_map_t** loc_map = app->map_data;
/* Accumulate all of the host name mappings */
for(k = 0; k < app->num_map; ++k) {
if ( ORTE_APP_CONTEXT_MAP_HOSTNAME == loc_map[k]->map_type ) {
if(mapped_nodes == NULL) {
mapped_nodes = opal_argv_split(loc_map[k]->map_data, ',');
num_mapped_nodes = opal_argv_count(mapped_nodes);
}
else { /* Append to the existing mapping */
char ** mini_map = opal_argv_split(loc_map[k]->map_data, ',');
size_t mini_num_map = opal_argv_count(mini_map);
for (j = 0; j < mini_num_map; ++j) {
rc = opal_argv_append(&num_mapped_nodes, &mapped_nodes, mini_map[j]);
if (OPAL_SUCCESS != rc) {
goto cleanup;
}
}
opal_argv_free(mini_map);
}
}
}
if( !are_all_mapped_valid(mapped_nodes, num_mapped_nodes, &nodes) ) {
opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:not-all-mapped-alloc",
true, app->app, opal_argv_join(mapped_nodes, ','));
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
/* If the current node is not in the current mapping
* Then get the next node that is in the mapping */
if( !is_mapped(cur_node_item, mapped_nodes, num_mapped_nodes, &nodes) ) {
cur_node_item = get_next_mapped(cur_node_item, mapped_nodes, num_mapped_nodes, &nodes);
if( NULL == cur_node_item) {
opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:no-mapped-node",
true, app->app, opal_argv_join(mapped_nodes, ','));
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto cleanup;
}
}
}
/* Make assignments */
if (bynode) {
rc = map_app_by_node(app, map, jobid, vpid_start, rank, &nodes);
rc = map_app_by_node(app, map, jobid, vpid_start, rank, &nodes, mapped_nodes, num_mapped_nodes);
} else {
rc = map_app_by_slot(app, map, jobid, vpid_start, rank, &nodes);
rc = map_app_by_slot(app, map, jobid, vpid_start, rank, &nodes, mapped_nodes, num_mapped_nodes);
}
if (ORTE_SUCCESS != rc) {
goto cleanup;
}
rank += app->num_procs;
opal_argv_free(mapped_nodes);
mapped_nodes = NULL;
}
/* save mapping to the registry */
@ -377,10 +556,17 @@ cleanup:
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&nodes);
while(NULL != (item = opal_list_remove_first(&mapping))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&mapping);
if( NULL != mapped_nodes ) {
opal_argv_free(mapped_nodes);
}
return rc;
}

Просмотреть файл

@ -80,10 +80,10 @@ static int orte_rmaps_round_robin_open(void)
/* JMS To be changed post-beta to LAM's C/N command line notation */
id = mca_base_param_find("ras_base", NULL, "schedule_policy");
id = mca_base_param_find("rmaps_base", NULL, "schedule_policy");
if (0 > id) {
id = mca_base_param_reg_string_name("ras_base", "schedule_policy",
"Scheduling Policy for RAS. [slot | node]",
id = mca_base_param_reg_string_name("rmaps_base", "schedule_policy",
"Scheduling Policy for RMAPS. [slot | node]",
false, false, "slot",
&mca_rmaps_round_robin_component.schedule_policy);
}

Просмотреть файл

@ -103,7 +103,7 @@ int orte_rmgr_base_put_app_context(
if(ORTE_SUCCESS != rc) {
goto cleanup;
}
rc = orte_rmgr_base_set_job_slots(jobid, job_slots);
rc = orte_rmgr_base_set_job_slots(jobid, job_slots); /* JJH C/N napping breaks here */
cleanup:
OBJ_RELEASE(value);

Просмотреть файл

@ -123,7 +123,7 @@ static int orte_rmgr_urm_create(
return rc;
}
/* create and initialize job segment */
/* create and initialize job segment */ /* JJH C/N mapping before this */
if (ORTE_SUCCESS !=
(rc = orte_rmgr_base_put_app_context(*jobid, app_context,
num_context))) {
@ -144,9 +144,9 @@ static int orte_rmgr_urm_create(
static int orte_rmgr_urm_allocate(orte_jobid_t jobid)
{
OPAL_TRACE(1);
OPAL_TRACE(1);
return mca_rmgr_urm_component.urm_ras->allocate(jobid);
return orte_ras_base_allocate(jobid, &mca_rmgr_urm_component.urm_ras);
}
static int orte_rmgr_urm_deallocate(orte_jobid_t jobid)
@ -308,7 +308,7 @@ static int orte_rmgr_urm_spawn(
/*
* Initialize job segment and allocate resources
*/
*/ /* JJH Insert C/N mapping stuff here */
if (ORTE_SUCCESS !=
(rc = orte_rmgr_urm_create(app_context,num_context,jobid))) {
ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -176,9 +176,9 @@ static orte_rmgr_base_module_t *orte_rmgr_urm_init(int* priority)
mca_rmgr_urm_component.urm_rds = false;
/**
* Select RAS component
* Find available RAS components
*/
if (NULL == (mca_rmgr_urm_component.urm_ras = orte_ras_base_select(NULL))) {
if (ORTE_SUCCESS != (rc = orte_ras_base_find_available())) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return NULL;
}

Просмотреть файл

@ -40,6 +40,7 @@
#include "mca/sds/base/base.h"
#include "mca/gpr/base/base.h"
#include "mca/ras/base/base.h"
#include "mca/ras/base/ras_base_node.h"
#include "mca/rds/base/base.h"
#include "mca/rmgr/base/base.h"
#include "mca/rmaps/base/base.h"
@ -427,6 +428,7 @@ int orte_init_stage1(bool infrastructure)
orte_rds_cell_desc_t *rds_item;
orte_rds_cell_attr_t *new_attr;
orte_ras_node_t *ras_item;
orte_ras_base_module_t *module;
OBJ_CONSTRUCT(&single_host, opal_list_t);
OBJ_CONSTRUCT(&rds_single_host, opal_list_t);
@ -487,13 +489,28 @@ int orte_init_stage1(bool infrastructure)
goto error;
}
ret = orte_ras.node_insert(&single_host);
/* JMS: This isn't quite right and should be fixed after
1.0 -- we shouldn't be doing this manually here. We
should somehow be invoking a real RAS component to do
this for us. */
ret = orte_ras_base_node_insert(&single_host);
if (ORTE_SUCCESS != ret ) {
ORTE_ERROR_LOG(ret);
error = "orte_ras.node_insert";
goto error;;
}
/* JMS: Same as above -- fix this after 1.0: force a
selection so that orte_ras has initialized pointers in
case anywhere else tries to use it. This may end up
putting a bunch more nodes on the node segment (e.g.,
if you're in a SLURM allocation and you "./a.out",
you'll end up with the localhost *and* all the other
nodes in your allocation on the node segment -- which
is probably fine) */
orte_ras_base_allocate(my_jobid, &module);
orte_ras = *module;
OBJ_DESTRUCT(&single_host);
OBJ_DESTRUCT(&rds_single_host);
}

Просмотреть файл

@ -715,11 +715,11 @@ static int parse_globals(int argc, char* argv[])
/* JMS To be changed post-beta to LAM's C/N command line notation */
/* Don't initialize the MCA parameter here unless we have to,
* since it really should be initialized in ras_base_open */
* since it really should be initialized in rmaps_base_open */
if (orterun_globals.by_node || orterun_globals.by_slot) {
char *policy = NULL;
id = mca_base_param_reg_string_name("ras_base", "schedule_policy",
"Scheduling Policy for RAS. [slot | node]",
id = mca_base_param_reg_string_name("rmaps_base", "schedule_policy",
"Scheduling policy for RMAPS. [slot | node]",
false, false, "slot", &policy);
if (orterun_globals.by_node) {
@ -1144,6 +1144,10 @@ static int create_app(int argc, char* argv[], orte_app_context_t **app_ptr,
}
app->map_data[i]->map_type = value[0] - '0';
app->map_data[i]->map_data = strdup(value2);
/* map_data = true;
* JJH - This activates the C/N mapping stuff,
* or at least allows us to pass the 'num_procs' check below.
* since it is not implemented yet, leave commented. */
}
}