diff --git a/orte/mca/rmaps/round_robin/rmaps_rr.h b/orte/mca/rmaps/round_robin/rmaps_rr.h index de9082ffee..cde3383c26 100644 --- a/orte/mca/rmaps/round_robin/rmaps_rr.h +++ b/orte/mca/rmaps/round_robin/rmaps_rr.h @@ -24,27 +24,13 @@ #define ORTE_RMAPS_RR_H #include "orte/mca/rmaps/rmaps.h" -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif +BEGIN_C_DECLS -/** - * RMGR Component - */ -struct orte_rmaps_round_robin_component_t { - orte_rmaps_base_component_t super; - int debug; - int priority; -}; -typedef struct orte_rmaps_round_robin_component_t orte_rmaps_round_robin_component_t; - -ORTE_MODULE_DECLSPEC extern orte_rmaps_round_robin_component_t mca_rmaps_round_robin_component; +ORTE_MODULE_DECLSPEC extern orte_rmaps_base_component_t mca_rmaps_round_robin_component; extern orte_rmaps_base_module_t orte_rmaps_round_robin_module; -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif +END_C_DECLS #endif diff --git a/orte/mca/rmaps/round_robin/rmaps_rr_component.c b/orte/mca/rmaps/round_robin/rmaps_rr_component.c index 26b80ecb7a..fda4b80d30 100644 --- a/orte/mca/rmaps/round_robin/rmaps_rr_component.c +++ b/orte/mca/rmaps/round_robin/rmaps_rr_component.c @@ -35,17 +35,13 @@ static int orte_rmaps_round_robin_close(void); static int orte_rmaps_round_robin_query(mca_base_module_t **module, int *priority); -orte_rmaps_round_robin_component_t mca_rmaps_round_robin_component = { +orte_rmaps_base_component_t mca_rmaps_round_robin_component = { { - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - - { /* Indicate that we are a rmaps v1.3.0 component (which also - implies a specific MCA version) */ - + implies a specific MCA version) */ + ORTE_RMAPS_BASE_VERSION_1_3_0, - + "round_robin", /* MCA component name */ ORTE_MAJOR_VERSION, /* MCA component major version */ ORTE_MINOR_VERSION, /* MCA component minor version */ @@ -53,12 +49,11 @@ orte_rmaps_round_robin_component_t mca_rmaps_round_robin_component = { orte_rmaps_round_robin_open, /* component open */ orte_rmaps_round_robin_close, /* component close */ orte_rmaps_round_robin_query /* component query */ - }, - /* Next the MCA v1.0.0 component meta data */ - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - } + }, + /* Next the MCA v1.0.0 component meta data */ + { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT } }; @@ -68,16 +63,6 @@ orte_rmaps_round_robin_component_t mca_rmaps_round_robin_component = { */ static int orte_rmaps_round_robin_open(void) { - mca_base_param_reg_int(&mca_rmaps_round_robin_component.super.base_version, "debug", - "Toggle debug output for Round Robin RMAPS component", - false, false, 1, - &mca_rmaps_round_robin_component.debug); - - mca_base_param_reg_int(&mca_rmaps_round_robin_component.super.base_version, "priority", - "Selection priority for Round Robin RMAPS component", - false, false, 1, - &mca_rmaps_round_robin_component.priority); - return ORTE_SUCCESS; } @@ -88,7 +73,7 @@ static int orte_rmaps_round_robin_query(mca_base_module_t **module, int *priorit * so no need to check for that here */ - *priority = mca_rmaps_round_robin_component.priority; + *priority = 70; /* this is the default mapper */ *module = (mca_base_module_t *)&orte_rmaps_round_robin_module; return ORTE_SUCCESS; } diff --git a/orte/mca/rmaps/topo/Makefile.am b/orte/mca/rmaps/topo/Makefile.am new file mode 100644 index 0000000000..46773668dc --- /dev/null +++ b/orte/mca/rmaps/topo/Makefile.am @@ -0,0 +1,45 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +dist_pkgdata_DATA = help-orte-rmaps-topo.txt + +sources = \ + rmaps_topo.c \ + rmaps_topo.h \ + rmaps_topo_component.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if OMPI_BUILD_rmaps_topo_DSO +component_noinst = +component_install = mca_rmaps_topo.la +else +component_noinst = libmca_rmaps_topo.la +component_install = +endif + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_rmaps_topo_la_SOURCES = $(sources) +mca_rmaps_topo_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_rmaps_topo_la_SOURCES =$(sources) +libmca_rmaps_topo_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/rmaps/topo/configure.params b/orte/mca/rmaps/topo/configure.params new file mode 100644 index 0000000000..3513f8d956 --- /dev/null +++ b/orte/mca/rmaps/topo/configure.params @@ -0,0 +1,24 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2007 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Specific to this module + +PARAM_CONFIG_FILES="Makefile" diff --git a/orte/mca/rmaps/topo/help-orte-rmaps-topo.txt b/orte/mca/rmaps/topo/help-orte-rmaps-topo.txt new file mode 100644 index 0000000000..dd660389ac --- /dev/null +++ b/orte/mca/rmaps/topo/help-orte-rmaps-topo.txt @@ -0,0 +1,53 @@ +# -*- text -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# This is the US/English general help file for Open RTE's orterun. +# +[orte-rmaps-topo:alloc-error] +There are not enough slots available in the system to satisfy the %d slots +that were requested by the application: + %s + +Either request fewer slots for your application, or make more slots available +for use. +[orte-rmaps-topo:multi-apps-and-zero-np] +RMAPS found multiple applications to be launched, with +at least one that failed to specify the number of processes to execute. +When specifying multiple applications, you must specify how many processes +of each to launch via the -np argument. + +[orte-rmaps-topo:per-node-and-too-many-procs] +There are not enough nodes in your allocation to satisfy your request to launch +%d processes on a per-node basis - only %d nodes were available. + +Either request fewer processes, or obtain a larger allocation. +[orte-rmaps-topo:n-per-node-and-too-many-procs] +There are not enough nodes in your allocation to satisfy your request to launch +%d processes on a %d per-node basis - only %d nodes with a total of %d slots were available. + +Either request fewer processes, or obtain a larger allocation. +[orte-rmaps-topo:n-per-node-and-not-enough-slots] +There are not enough slots on the nodes in your allocation to satisfy your request to launch on a %d process-per-node basis - only %d slots/node were available. + +Either request fewer processes/node, or obtain a larger allocation. + +[orte-rmaps-topo:no-np-and-user-map] +You have specified a rank-to-node/slot mapping, but failed to provide +the number of processes to be executed. For some reason, this information +could not be obtained from the mapping you provided, so we cannot continue +with executing the specified application. diff --git a/orte/mca/rmaps/topo/rmaps_topo.c b/orte/mca/rmaps/topo/rmaps_topo.c new file mode 100644 index 0000000000..c805248710 --- /dev/null +++ b/orte/mca/rmaps/topo/rmaps_topo.c @@ -0,0 +1,548 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" +#include "orte/types.h" + +#include +#ifdef HAVE_UNISTD_H +#include +#endif /* HAVE_UNISTD_H */ +#ifdef HAVE_STRING_H +#include +#endif /* HAVE_STRING_H */ + +#include "opal/mca/base/mca_base_param.h" +#include "opal/util/output.h" +#include "opal/util/trace.h" +#include "opal/util/show_help.h" +#include "opal/util/argv.h" +#include "opal/mca/carto/base/base.h" + +#include "orte/mca/errmgr/errmgr.h" + +#include "orte/mca/rmaps/base/rmaps_private.h" +#include "orte/mca/rmaps/base/base.h" +#include "rmaps_topo.h" + +static int topo_map(orte_job_t *jdata); + +orte_rmaps_base_module_t orte_rmaps_topo_module = { + topo_map +}; + +/* + * Local variable + */ +static opal_list_item_t *cur_node_item = NULL; +static int ppn = 0; + +/* + * Create a default mapping for the application, scheduling round + * robin by node. + */ +static int map_app_by_node( + orte_app_context_t* app, + orte_job_t* jdata, + orte_vpid_t vpid_start, + opal_list_t* nodes) +{ + int rc = ORTE_SUCCESS; + opal_list_item_t *next; + orte_node_t *node; + orte_std_cntr_t num_alloc=0; + + OPAL_TRACE(2); + + /* This loop continues until all procs have been mapped or we run + out of resources. We determine that we have "run out of + resources" when all nodes have slots_max processes mapped to them, + thus there are no free slots for a process to be mapped, or we have + hit the soft limit on all nodes and are in a "no oversubscribe" state. + If we still have processes that haven't been mapped yet, then it's an + "out of resources" error. + + In this scenario, we rely on the claim_slot function to handle the + oversubscribed case. The claim_slot function will leave a node on the + list until it either reaches slots_max OR reaches the + soft limit and the "no_oversubscribe" flag has been set - at which point, + the node will be removed to prevent any more processes from being mapped to + it. Since we are taking one slot from each node as we cycle through, the + list, oversubscription is automatically taken care of via this logic. + */ + + while (num_alloc < app->num_procs) { + + /** see if any nodes remain unused and available. We need to do this check + * each time since we may remove nodes from the list (as they become fully + * used) as we cycle through the loop */ + if(0 >= opal_list_get_size(nodes) ) { + /* No more nodes to allocate :( */ + opal_show_help("help-orte-rmaps-topo.txt", "orte-rmaps-topo:alloc-error", + true, app->num_procs, app->app); + return ORTE_ERR_SILENT; + } + + /* Save the next node we can use before claiming slots, since + * we may need to prune the nodes list removing overused nodes. + * Wrap around to beginning if we are at the end of the list */ + if (opal_list_get_end(nodes) == opal_list_get_next(cur_node_item)) { + next = opal_list_get_first(nodes); + } + else { + next = opal_list_get_next(cur_node_item); + } + + /* Allocate a slot on this node */ + node = (orte_node_t*) cur_node_item; + if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + num_alloc, app->idx, + nodes, jdata->map->oversubscribe, true))) { + /** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this + * really isn't an error - we just need to break from the loop + * since the node is fully used up. For now, just don't report + * an error + */ + if (ORTE_ERR_NODE_FULLY_USED != rc) { + ORTE_ERROR_LOG(rc); + return rc; + } + } + + ++num_alloc; + + cur_node_item = next; + } + + return ORTE_SUCCESS; +} + + +/* + * Create a default mapping for the application, scheduling one round + * robin by slot. + */ +static int map_app_by_slot( + orte_app_context_t* app, + orte_job_t* jdata, + orte_vpid_t vpid_start, + opal_list_t* nodes) +{ + int rc = ORTE_SUCCESS; + orte_std_cntr_t i, num_slots_to_take; + orte_node_t *node; + opal_list_item_t *next; + orte_std_cntr_t num_alloc=0; + + OPAL_TRACE(2); + + /* This loop continues until all procs have been mapped or we run + out of resources. We determine that we have "run out of + resources" when either all nodes have slots_max processes mapped to them, + (thus there are no free slots for a process to be mapped), OR all nodes + have reached their soft limit and the user directed us to "no oversubscribe". + If we still have processes that haven't been mapped yet, then it's an + "out of resources" error. */ + + while ( num_alloc < app->num_procs) { + + /** see if any nodes remain unused and available. We need to do this check + * each time since we may remove nodes from the list (as they become fully + * used) as we cycle through the loop */ + if(0 >= opal_list_get_size(nodes) ) { + /* Everything is at max usage! :( */ + opal_show_help("help-orte-rmaps-topo.txt", "orte-rmaps-topo:alloc-error", + true, app->num_procs, app->app); + return ORTE_ERR_SILENT; + } + + /* Save the next node we can use before claiming slots, since + * we may need to prune the nodes list removing overused nodes. + * Wrap around to beginning if we are at the end of the list */ + if (opal_list_get_end(nodes) == opal_list_get_next(cur_node_item)) { + next = opal_list_get_first(nodes); + } + else { + next = opal_list_get_next(cur_node_item); + } + + /** declare a shorter name for convenience in the code below */ + node = (orte_node_t*) cur_node_item; + + /* If we have available slots on this node, claim all of them + * If node_slots == 0, assume 1 slot for that node. + * JJH - is this assumption fully justified? + * + * If we are now oversubscribing the nodes, then we still take: + * (a) if the node has not been used yet, we take a full node_slots + * (b) if some of the slots are in-use, then we take the number of + * remaining slots before hitting the soft limit (node_slots) + * (c) if we are at or above the soft limit, we take a full node_slots + * + * Note: if node_slots is zero, then we always just take 1 slot + * + * We continue this process until either everything is done, + * or all nodes have hit their hard limit. This algorithm ensures we + * fully utilize each node before oversubscribing, and preserves the ratio + * of processes between the nodes thereafter (e.g., if one node has twice as + * many processes as another before oversubscribing, it will continue + * to do so after oversubscribing). + */ + if (0 == node->slots_inuse || + node->slots_inuse >= node->slots) { + num_slots_to_take = (node->slots == 0) ? 1 : node->slots; + } else { + num_slots_to_take = node->slots - node->slots_inuse; + } + + /* check if we are in npernode mode - if so, then set the num_slots_to_take + * to the num_per_node + */ + if (jdata->map->pernode) { + num_slots_to_take = jdata->map->npernode; + } + + for( i = 0; i < num_slots_to_take; ++i) { + if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, vpid_start + num_alloc, app->idx, + nodes, jdata->map->oversubscribe, true))) { + /** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this + * really isn't an error - we just need to break from the loop + * since the node is fully used up. For now, just don't report + * an error + */ + if (ORTE_ERR_NODE_FULLY_USED != rc) { + ORTE_ERROR_LOG(rc); + return rc; + } + } + + /* Update the number of procs allocated */ + ++num_alloc; + + /** if all the procs have been mapped, we return */ + if (num_alloc == app->num_procs) { + return ORTE_SUCCESS; + } + + /* if we have fully used up this node + * OR we are at our ppn and loadbalancing, then break from the loop + */ + if (ORTE_ERR_NODE_FULLY_USED == rc || + (orte_rmaps_base.loadbalance && (int)node->num_procs >= ppn)) { + break; + } + } + + /* we move on to the next node in all cases EXCEPT if we came + * out of the loop without having taken a full bite AND the + * node is NOT max'd out + * + */ + if (i < (num_slots_to_take-1) && + ORTE_ERR_NODE_FULLY_USED != rc && + (orte_rmaps_base.loadbalance && (int)node->num_procs < ppn)) { + continue; + } + cur_node_item = next; + } + + return ORTE_SUCCESS; +} + + +/* + * Create a topo-aware mapping for the job. + */ +static int topo_map(orte_job_t *jdata) +{ + orte_job_map_t *map; + orte_app_context_t *app, **apps; + orte_std_cntr_t i; + opal_list_t node_list; + opal_list_item_t *item; + orte_node_t *node, *nd1; + orte_vpid_t vpid_start; + orte_std_cntr_t num_nodes, num_slots; + int rc; + orte_std_cntr_t slots_per_node; + opal_carto_graph_t *graph; + opal_carto_base_node_t *crnode; + opal_value_array_t distance; + + OPAL_TRACE(1); + + /* conveniece def */ + map = jdata->map; + apps = (orte_app_context_t**)jdata->apps->addr; + + /* start at the beginning... */ + vpid_start = 0; + + /* get the graph of nodes */ + if (ORTE_SUCCESS != (rc = opal_carto_base_get_host_graph(&graph, "SLOT"))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* cycle through the app_contexts, mapping them sequentially */ + for(i=0; i < jdata->num_apps; i++) { + app = apps[i]; + + /* if the number of processes wasn't specified, then we know there can be only + * one app_context allowed in the launch, and that we are to launch it across + * all available slots. We'll double-check the single app_context rule first + */ + if (0 == app->num_procs && 1 < jdata->num_apps) { + opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:multi-apps-and-zero-np", + true, jdata->num_apps, NULL); + rc = ORTE_ERR_SILENT; + goto error; + } + + /* for each app_context, we have to get the list of nodes that it can + * use since that can now be modified with a hostfile and/or -host + * option + */ + OBJ_CONSTRUCT(&node_list, opal_list_t); + if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app, + map->policy))) { + ORTE_ERROR_LOG(rc); + goto error; + } + num_nodes = (orte_std_cntr_t)opal_list_get_size(&node_list); + + /* if a bookmark exists from some prior mapping, set us to start there */ + if (NULL != jdata->bookmark) { + cur_node_item = NULL; + /* find this node on the list */ + for (item = opal_list_get_first(&node_list); + item != opal_list_get_end(&node_list); + item = opal_list_get_next(item)) { + node = (orte_node_t*)item; + + if (node->index == jdata->bookmark->index) { + cur_node_item = item; + break; + } + } + /* see if we found it - if not, just start at the beginning */ + if (NULL == cur_node_item) { + cur_node_item = opal_list_get_first(&node_list); + } + } else { + /* if no bookmark, then just start at the beginning of the list */ + cur_node_item = opal_list_get_first(&node_list); + } + + /* order this list by network nearness - i.e., the next item in the + * list should be the node that is closest [in a network sense] to + * the prior item in the list + * + * RHC: start the list with the bookmark nodeas this is where + * we would start mapping + */ + node = (orte_node_t*)cur_node_item; + if (NULL == (crnode = opal_carto_base_find_node(graph, node->name))) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + rc = ORTE_ERR_NOT_FOUND; + goto error; + } + OBJ_CONSTRUCT(&distance, opal_value_array_t); + if (ORTE_SUCCESS != (rc = opal_carto_base_get_nodes_distance(graph, crnode, + "SLOT", &distance))) { + ORTE_ERROR_LOG(rc); + goto error; + } + /* cycle through the nodes in the distance array - these + * should be in order based on distance + */ +#if 0 + /* RHC: need to create a working list of nodes that is ordered + * according to distance. The get_nodes_distance function returns + * this, but it covers -all- nodes, so we have to filter that + * against the allocated node list to create the new + * working_node_list + */ + for (i=0; i < distance.size; i++) { + if + } + for (item = opal_list_get_first(&node_list); + item != opal_list_get_end(&node_list); + item = opal_list_get_next(item)) { + node = (orte_node_t*)item; + + if (NULL == (crnode = opal_carto.find_node(graph, node->name))) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + rc = ORTE_ERR_NOT_FOUND; + goto error; + } + + /* look this node up in the distance array */ +#endif + + /* is this node oversubscribed? */ + node = (orte_node_t*)cur_node_item; + if (node->slots_inuse > node->slots) { + /* work down the list - is there another node that + * would not be oversubscribed? + */ + if (cur_node_item != opal_list_get_end(&node_list)) { + item = opal_list_get_next(cur_node_item); + } else { + item = opal_list_get_first(&node_list); + } + while (item != cur_node_item) { + nd1 = (orte_node_t*)item; + if (nd1->slots_inuse < nd1->slots) { + /* this node is not oversubscribed! use it! */ + cur_node_item = item; + goto proceed; + } + if (item == opal_list_get_end(&node_list)) { + item = opal_list_get_first(&node_list); + } else { + item= opal_list_get_next(item); + } + } + /* if we get here, then we cycled all the way around the + * list without finding a better answer - just use what + * we have + */ + } + + proceed: + if (map->pernode && map->npernode == 1) { + /* there are three use-cases that we need to deal with: + * (a) if -np was not provided, then we just use the number of nodes + * (b) if -np was provided AND #procs > #nodes, then error out + * (c) if -np was provided AND #procs <= #nodes, then launch + * the specified #procs one/node. In this case, we just + * leave app->num_procs alone + */ + if (0 == app->num_procs) { + app->num_procs = num_nodes; + } else if (app->num_procs > num_nodes) { + opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:per-node-and-too-many-procs", + true, app->num_procs, num_nodes, NULL); + rc = ORTE_ERR_SILENT; + goto error; + } + } else if (map->pernode && map->npernode > 1) { + /* first, let's check to see if there are enough slots/node to + * meet the request - error out if not + */ + slots_per_node = num_slots / num_nodes; + if (map->npernode > slots_per_node) { + opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:n-per-node-and-not-enough-slots", + true, map->npernode, slots_per_node, NULL); + rc = ORTE_ERR_SILENT; + goto error; + } + /* there are three use-cases that we need to deal with: + * (a) if -np was not provided, then we just use the n/node * #nodes + * (b) if -np was provided AND #procs > (n/node * #nodes), then error out + * (c) if -np was provided AND #procs <= (n/node * #nodes), then launch + * the specified #procs n/node. In this case, we just + * leave app->num_procs alone + */ + if (0 == app->num_procs) { + /* set the num_procs to equal the specified num/node * the number of nodes */ + app->num_procs = map->npernode * num_nodes; + } else if (app->num_procs > (map->npernode * num_nodes)) { + opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:n-per-node-and-too-many-procs", + true, app->num_procs, map->npernode, num_nodes, num_slots, NULL); + rc = ORTE_ERR_SILENT; + goto error; + } + } else if (0 == app->num_procs) { + /** set the num_procs to equal the number of slots on these mapped nodes - if + user has specified "-bynode", then set it to the number of nodes + */ + if (map->policy & ORTE_RMAPS_BYNODE) { + app->num_procs = num_nodes; + } else if (map->policy & ORTE_RMAPS_BYSLOT) { + app->num_procs = num_slots; + } else if (map->policy & ORTE_RMAPS_BYUSER) { + /* we can't handle this - it should have been set when we got + * the map info. If it wasn't, then we can only error out + */ + opal_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:no-np-and-user-map", + true, app->num_procs, map->npernode, num_nodes, num_slots, NULL); + rc = ORTE_ERR_SILENT; + goto error; + } + } + + /** track the total number of processes we mapped */ + jdata->num_procs += app->num_procs; + + /* Make assignments */ + if (map->policy == ORTE_RMAPS_BYUSER) { + rc = ORTE_ERR_NOT_IMPLEMENTED; + goto error; + } else if (map->policy == ORTE_RMAPS_BYNODE) { + rc = map_app_by_node(app, jdata, vpid_start, &node_list); + } else { + rc = map_app_by_slot(app, jdata, vpid_start, &node_list); + } + + /* update the starting vpid for the next app_context */ + vpid_start += app->num_procs; + + if (ORTE_SUCCESS != rc) { + ORTE_ERROR_LOG(rc); + goto error; + } + + /* save the bookmark */ + jdata->bookmark = (orte_node_t*)cur_node_item; + + /* cleanup the node list - it can differ from one app_context + * to another, so we have to get it every time + */ + while(NULL != (item = opal_list_remove_first(&node_list))) { + OBJ_RELEASE(item); + } + OBJ_DESTRUCT(&node_list); + } + + /* compute and save convenience values */ + if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_usage(jdata))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* define the daemons that we will use for this job */ + if (ORTE_SUCCESS != (rc = orte_rmaps_base_define_daemons(map))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + return ORTE_SUCCESS; + +error: + while(NULL != (item = opal_list_remove_first(&node_list))) { + OBJ_RELEASE(item); + } + OBJ_DESTRUCT(&node_list); + + return rc; +} + + diff --git a/orte/mca/rmaps/topo/rmaps_topo.h b/orte/mca/rmaps/topo/rmaps_topo.h new file mode 100644 index 0000000000..cd19c74acb --- /dev/null +++ b/orte/mca/rmaps/topo/rmaps_topo.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + * + * Resource Mapping + */ +#ifndef ORTE_RMAPS_TOPO_H +#define ORTE_RMAPS_TOPO_H + +#include "orte/mca/rmaps/rmaps.h" + +BEGIN_C_DECLS + +ORTE_MODULE_DECLSPEC extern orte_rmaps_base_component_t mca_rmaps_topo_component; +extern orte_rmaps_base_module_t orte_rmaps_topo_module; + + +END_C_DECLS + +#endif diff --git a/orte/mca/rmaps/topo/rmaps_topo_component.c b/orte/mca/rmaps/topo/rmaps_topo_component.c new file mode 100644 index 0000000000..256802ddc1 --- /dev/null +++ b/orte/mca/rmaps/topo/rmaps_topo_component.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_param.h" +#include "opal/util/output.h" + +#include "orte/mca/rmaps/base/rmaps_private.h" +#include "rmaps_topo.h" + +/* + * Local functions + */ + +static int orte_rmaps_topo_open(void); +static int orte_rmaps_topo_close(void); +static int orte_rmaps_topo_query(mca_base_module_t **module, int *priority); + + +orte_rmaps_base_component_t mca_rmaps_topo_component = { + { + /* Indicate that we are a rmaps v1.3.0 component (which also + implies a specific MCA version) */ + + ORTE_RMAPS_BASE_VERSION_1_3_0, + + "topo", /* MCA component name */ + ORTE_MAJOR_VERSION, /* MCA component major version */ + ORTE_MINOR_VERSION, /* MCA component minor version */ + ORTE_RELEASE_VERSION, /* MCA component release version */ + orte_rmaps_topo_open, /* component open */ + orte_rmaps_topo_close, /* component close */ + orte_rmaps_topo_query /* component query */ + }, + /* Next the MCA v1.0.0 component meta data */ + { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + } +}; + + +/** + * component open/close/init function + */ +static int orte_rmaps_topo_open(void) +{ + return ORTE_SUCCESS; +} + + +static int orte_rmaps_topo_query(mca_base_module_t **module, int *priority) +{ + /* the RMAPS framework is -only- opened on HNP's, + * so no need to check for that here + */ + + *priority = 0; /* only select if specified */ + *module = (mca_base_module_t *)&orte_rmaps_topo_module; + return ORTE_SUCCESS; +} + +/** + * Close all subsystems. + */ + +static int orte_rmaps_topo_close(void) +{ + return ORTE_SUCCESS; +} + +