diff --git a/opal/mca/memory/linux/hooks.c b/opal/mca/memory/linux/hooks.c index 2be7f79a62..67252fa505 100644 --- a/opal/mca/memory/linux/hooks.c +++ b/opal/mca/memory/linux/hooks.c @@ -791,7 +791,8 @@ static void opal_memory_linux_malloc_init_hook(void) 0 == stat("/dev/myri7", &st) || 0 == stat("/dev/myri8", &st) || 0 == stat("/dev/myri9", &st) || - 0 == stat("/dev/ipath", &st)) { + 0 == stat("/dev/ipath", &st) || + 0 == stat("/dev/kgni0", &st)) { found_driver = true; } diff --git a/orte/mca/rmaps/load_balance/Makefile.am b/orte/mca/rmaps/load_balance/Makefile.am new file mode 100644 index 0000000000..65c6c4271d --- /dev/null +++ b/orte/mca/rmaps/load_balance/Makefile.am @@ -0,0 +1,46 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +dist_pkgdata_DATA = help-orte-rmaps-lb.txt + +sources = \ + rmaps_lb.c \ + rmaps_lb.h \ + rmaps_lb_component.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_orte_rmaps_load_balance_DSO +component_noinst = +component_install = mca_rmaps_load_balance.la +else +component_noinst = libmca_rmaps_load_balance.la +component_install = +endif + +mcacomponentdir = $(pkglibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_rmaps_load_balance_la_SOURCES = $(sources) +mca_rmaps_load_balance_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_rmaps_load_balance_la_SOURCES =$(sources) +libmca_rmaps_load_balance_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/rmaps/load_balance/help-orte-rmaps-lb.txt b/orte/mca/rmaps/load_balance/help-orte-rmaps-lb.txt new file mode 100644 index 0000000000..2b7941d88a --- /dev/null +++ b/orte/mca/rmaps/load_balance/help-orte-rmaps-lb.txt @@ -0,0 +1,53 @@ +# -*- text -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# +# This is the US/English general help file for Open RTE's orterun. +# +[orte-rmaps-rr:alloc-error] +There are not enough slots available in the system to satisfy the %d slots +that were requested by the application: + %s + +Either request fewer slots for your application, or make more slots available +for use. +[orte-rmaps-rr:multi-apps-and-zero-np] +RMAPS found multiple applications to be launched, with +at least one that failed to specify the number of processes to execute. +When specifying multiple applications, you must specify how many processes +of each to launch via the -np argument. + +[orte-rmaps-rr:per-node-and-too-many-procs] +There are not enough nodes in your allocation to satisfy your request to launch +%d processes on a per-node basis - only %d nodes were available. + +Either request fewer processes, or obtain a larger allocation. +[orte-rmaps-rr:n-per-node-and-too-many-procs] +There are not enough nodes in your allocation to satisfy your request to launch +%d processes on a %d per-node basis - only %d nodes with a total of %d slots were available. + +Either request fewer processes, or obtain a larger allocation. +[orte-rmaps-rr:n-per-node-and-not-enough-slots] +There are not enough slots on the nodes in your allocation to satisfy your request to launch on a %d process-per-node basis - only %d slots/node were available. + +Either request fewer processes/node, or obtain a larger allocation. + +[orte-rmaps-rr:no-np-and-user-map] +You have specified a rank-to-node/slot mapping, but failed to provide +the number of processes to be executed. For some reason, this information +could not be obtained from the mapping you provided, so we cannot continue +with executing the specified application. diff --git a/orte/mca/rmaps/load_balance/rmaps_lb.c b/orte/mca/rmaps/load_balance/rmaps_lb.c new file mode 100644 index 0000000000..226aca8c9a --- /dev/null +++ b/orte/mca/rmaps/load_balance/rmaps_lb.c @@ -0,0 +1,544 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" +#include "orte/types.h" + +#include +#ifdef HAVE_UNISTD_H +#include +#endif /* HAVE_UNISTD_H */ +#ifdef HAVE_STRING_H +#include +#endif /* HAVE_STRING_H */ + +#include "opal/mca/base/mca_base_param.h" +#include "opal/util/opal_sos.h" + +#include "orte/util/show_help.h" +#include "orte/mca/errmgr/errmgr.h" + +#include "orte/mca/rmaps/base/rmaps_private.h" +#include "orte/mca/rmaps/base/base.h" +#include "rmaps_lb.h" + +static int switchyard(orte_job_t *jdata); + +orte_rmaps_base_module_t orte_rmaps_load_balance_module = { + switchyard +}; + +/* Local functions */ +static int npernode(orte_job_t *jdata); +static int nperboard(orte_job_t *jdata); +static int npersocket(orte_job_t *jdata); +static int loadbalance(orte_job_t *jdata); + +static int switchyard(orte_job_t *jdata) +{ + int rc; + mca_base_component_t *c = &mca_rmaps_load_balance_component.super.base_version; + + /* only handle initial launch of loadbalanced + * or NPERxxx jobs - allow restarting of failed apps + */ + if (ORTE_JOB_STATE_INIT != jdata->state) { + opal_output_verbose(5, orte_rmaps_base.rmaps_output, + "mca:rmaps:lb: job %s not in initial state - loadbalance cannot map", + ORTE_JOBID_PRINT(jdata->jobid)); + return ORTE_ERR_TAKE_NEXT_OPTION; + } + if (NULL != jdata->map->req_mapper && + 0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name)) { + /* a mapper has been specified, and it isn't me */ + opal_output_verbose(5, orte_rmaps_base.rmaps_output, + "mca:rmaps:lb: job %s not using loadbalance mapper", + ORTE_JOBID_PRINT(jdata->jobid)); + return ORTE_ERR_TAKE_NEXT_OPTION; + } + + opal_output_verbose(5, orte_rmaps_base.rmaps_output, + "mca:rmaps:loadbalance: mapping job %s", + ORTE_JOBID_PRINT(jdata->jobid)); + + /* flag that I did the mapping */ + if (NULL != jdata->map->last_mapper) { + free(jdata->map->last_mapper); + } + jdata->map->last_mapper = strdup(c->mca_component_name); + + if (0 < mca_rmaps_load_balance_component.npernode || + 0 < jdata->map->npernode) { + rc = npernode(jdata); + } else if (0 < mca_rmaps_load_balance_component.nperboard || + 0 < jdata->map->nperboard) { + rc = nperboard(jdata); + } else if (0 < mca_rmaps_load_balance_component.npersocket || + 0 < jdata->map->npersocket) { + rc = npersocket(jdata); + } else { + rc = loadbalance(jdata); + } + + if (ORTE_SUCCESS != rc) { + return rc; + } + + /* compute and save local ranks */ + if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* define the daemons that we will use for this job */ + if (ORTE_SUCCESS != (rc = orte_rmaps_base_define_daemons(jdata))) { + ORTE_ERROR_LOG(rc); + } + + return rc; +} + + +/* place specified #procs on each node, up to the specified total + * number of procs (if one was given). + */ +static int npernode(orte_job_t *jdata) +{ + orte_app_context_t *app; + int j, rc=ORTE_SUCCESS; + opal_list_t node_list; + opal_list_item_t *item; + orte_std_cntr_t num_slots; + orte_node_t *node; + int np, nprocs; + int num_nodes; + + /* setup the node list */ + OBJ_CONSTRUCT(&node_list, opal_list_t); + + /* can only have one app_context here */ + if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0))) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + /* use the number of procs if one was given */ + if (0 < app->num_procs) { + np = app->num_procs; + } else { + np = INT_MAX; + } + /* for each app_context, we have to get the list of nodes that it can + * use since that can now be modified with a hostfile and/or -host + * option + */ + if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app, + jdata->map->policy))) { + ORTE_ERROR_LOG(rc); + goto error; + } + /* loop through the list of nodes */ + num_nodes = opal_list_get_size(&node_list); + nprocs = 0; + while (NULL != (item = opal_list_remove_first(&node_list))) { + node = (orte_node_t*)item; + /* put the specified number of procs on each node */ + for (j=0; j < mca_rmaps_load_balance_component.npernode && nprocs < np; j++) { + if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, + jdata->map->cpus_per_rank, app->idx, + &node_list, jdata->map->oversubscribe, + false, NULL))) { + /** if the code is ORTE_ERR_NODE_FULLY_USED, and we still have + * more procs to place, then that is an error + */ + if (ORTE_ERR_NODE_FULLY_USED != OPAL_SOS_GET_ERROR_CODE(rc) || + j < mca_rmaps_load_balance_component.npernode-1) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(node); + goto error; + } + } + nprocs++; + } + OBJ_RELEASE(node); + } + /* if the user requested a specific number of procs and + * the total number of procs we were able to assign + * doesn't equal the number requested, then we have a + * problem + */ + if (0 < app->num_procs && nprocs < app->num_procs) { + orte_show_help("help-orte-rmaps-base.txt", "rmaps:too-many-procs", true, + app->app, app->num_procs, + "number of nodes", num_nodes, + "npernode", mca_rmaps_load_balance_component.npernode); + return ORTE_ERR_SILENT; + } + /* update the number of procs in the job */ + jdata->num_procs += nprocs; + /* compute vpids and add proc objects to the job - this has to be + * done after each app_context is mapped in order to keep the + * vpids contiguous within an app_context + */ + if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) { + ORTE_ERROR_LOG(rc); + return rc; + } + +error: + while (NULL != (item = opal_list_remove_first(&node_list))) { + OBJ_RELEASE(item); + } + OBJ_DESTRUCT(&node_list); + return rc; +} + +static int nperboard(orte_job_t *jdata) +{ + orte_app_context_t *app; + int j, k, rc=ORTE_SUCCESS; + opal_list_t node_list; + opal_list_item_t *item; + orte_std_cntr_t num_slots; + orte_node_t *node; + int np, nprocs; + int num_boards=orte_default_num_boards; + + /* setup the node list */ + OBJ_CONSTRUCT(&node_list, opal_list_t); + + /* can only have one app_context here */ + if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0))) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + /* use the number of procs if one was given */ + if (0 < app->num_procs) { + np = app->num_procs; + } else { + np = INT_MAX; + } + /* for each app_context, we have to get the list of nodes that it can + * use since that can now be modified with a hostfile and/or -host + * option + */ + if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app, + jdata->map->policy))) { + ORTE_ERROR_LOG(rc); + goto error; + } + /* loop through the list of nodes */ + nprocs = 0; + while (NULL != (item = opal_list_remove_first(&node_list))) { + node = (orte_node_t*)item; + num_boards = node->boards; + /* loop through the number of boards in this node */ + for (k=0; k < node->boards && nprocs < np; k++) { + /* put the specified number of procs on each board */ + for (j=0; j < mca_rmaps_load_balance_component.nperboard && nprocs < np; j++) { + if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, + jdata->map->cpus_per_rank, app->idx, + &node_list, jdata->map->oversubscribe, + false, NULL))) { + /** if the code is ORTE_ERR_NODE_FULLY_USED, and we still have + * more procs to place, then that is an error + */ + if (ORTE_ERR_NODE_FULLY_USED != OPAL_SOS_GET_ERROR_CODE(rc) || + j < mca_rmaps_load_balance_component.nperboard-1) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(node); + goto error; + } + } + nprocs++; + } + } + OBJ_RELEASE(node); + } + /* if the user requested a specific number of procs and + * the total number of procs we were able to assign + * doesn't equal the number requested, then we have a + * problem + */ + if (0 < app->num_procs && nprocs < app->num_procs) { + orte_show_help("help-orte-rmaps-base.txt", "rmaps:too-many-procs", true, + app->app, app->num_procs, + "number of boards", num_boards, + "nperboard", mca_rmaps_load_balance_component.nperboard); + return ORTE_ERR_SILENT; + } + /* update the number of procs in the job */ + jdata->num_procs += nprocs; + /* compute vpids and add proc objects to the job - this has to be + * done after each app_context is mapped in order to keep the + * vpids contiguous within an app_context + */ + if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) { + ORTE_ERROR_LOG(rc); + return rc; + } + +error: + while (NULL != (item = opal_list_remove_first(&node_list))) { + OBJ_RELEASE(item); + } + OBJ_DESTRUCT(&node_list); + return rc; +} + + +static int npersocket(orte_job_t *jdata) +{ + orte_app_context_t *app; + int j, k, n, rc=ORTE_SUCCESS; + opal_list_t node_list; + opal_list_item_t *item; + orte_std_cntr_t num_slots; + orte_node_t *node; + int np, nprocs; + int num_sockets=orte_default_num_sockets_per_board; + + /* setup the node list */ + OBJ_CONSTRUCT(&node_list, opal_list_t); + + /* can only have one app_context here */ + if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0))) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return ORTE_ERR_NOT_FOUND; + } + /* use the number of procs if one was given */ + if (0 < app->num_procs) { + np = app->num_procs; + } else { + np = INT_MAX; + } + /* for each app_context, we have to get the list of nodes that it can + * use since that can now be modified with a hostfile and/or -host + * option + */ + if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app, + jdata->map->policy))) { + ORTE_ERROR_LOG(rc); + goto error; + } + /* loop through the list of nodes */ + nprocs = 0; + while (NULL != (item = opal_list_remove_first(&node_list))) { + node = (orte_node_t*)item; + num_sockets = node->sockets_per_board; + /* loop through the number of boards in this node */ + for (k=0; k < node->boards && nprocs < np; k++) { + /* loop through the number of sockets/board */ + for (n=0; n < node->sockets_per_board && nprocs < np; n++) { + /* put the specified number of procs on each socket */ + for (j=0; j < mca_rmaps_load_balance_component.npersocket && nprocs < np; j++) { + if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, + jdata->map->cpus_per_rank, app->idx, + &node_list, jdata->map->oversubscribe, + false, NULL))) { + /** if the code is ORTE_ERR_NODE_FULLY_USED, and we still have + * more procs to place, then that is an error + */ + if (ORTE_ERR_NODE_FULLY_USED != OPAL_SOS_GET_ERROR_CODE(rc) || + j < mca_rmaps_load_balance_component.npersocket-1) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(node); + goto error; + } + } + /* track the number of procs */ + nprocs++; + } + } + } + OBJ_RELEASE(node); + } + /* if the user requested a specific number of procs and + * the total number of procs we were able to assign + * doesn't equal the number requested, then we have a + * problem + */ + if (0 < app->num_procs && nprocs < app->num_procs) { + orte_show_help("help-orte-rmaps-base.txt", "rmaps:too-many-procs", true, + app->app, app->num_procs, + "number of sockets", num_sockets, + "npersocket", mca_rmaps_load_balance_component.npersocket); + return ORTE_ERR_SILENT; + } + /* update the number of procs in the job */ + jdata->num_procs += nprocs; + /* compute vpids and add proc objects to the job - this has to be + * done after each app_context is mapped in order to keep the + * vpids contiguous within an app_context + */ + if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) { + ORTE_ERROR_LOG(rc); + return rc; + } + +error: + while (NULL != (item = opal_list_remove_first(&node_list))) { + OBJ_RELEASE(item); + } + OBJ_DESTRUCT(&node_list); + return rc; +} + + +/* + * Create a load balanced mapping for the job by assigning a constant #procs/node, with + * leftovers being spread one/node starting from the first node. + */ +static int loadbalance(orte_job_t *jdata) +{ + orte_app_context_t *app; + int i, j; + opal_list_t node_list; + orte_std_cntr_t num_nodes, num_slots; + int rc=ORTE_SUCCESS, np, nprocs; + int ppn = 0; + opal_list_item_t *item, *start; + orte_node_t *node; + + /* setup */ + OBJ_CONSTRUCT(&node_list, opal_list_t); + + /* compute total #procs we are going to add and the total number of nodes available */ + for(i=0; i < jdata->apps->size; i++) { + if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) { + continue; + } + /* get the nodes and #slots available for this app_context */ + if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app, + jdata->map->policy))) { + ORTE_ERROR_LOG(rc); + goto error; + } + if (0 < app->num_procs) { + np = app->num_procs; + } else { + /* set the num_procs to the #slots */ + np = num_slots; + } + num_nodes = opal_list_get_size(&node_list); + /* compute the base ppn */ + ppn = np / num_nodes; + /* if a bookmark exists from some prior mapping, set us to start there */ + start = orte_rmaps_base_get_starting_point(&node_list, jdata); + /* loop through the list of nodes until we either assign all the procs + * or return to the starting point + */ + item = start; + nprocs = 0; + do { + node = (orte_node_t*)item; + /* put the specified number of procs on each node */ + for (j=0; j < ppn; j++) { + if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, + jdata->map->cpus_per_rank, app->idx, + &node_list, jdata->map->oversubscribe, + false, NULL))) { + /** if the code is ORTE_ERR_NODE_FULLY_USED, and we still have + * more procs to place, then that is an error + */ + if (ORTE_ERR_NODE_FULLY_USED != OPAL_SOS_GET_ERROR_CODE(rc) || + j < ppn-1) { + ORTE_ERROR_LOG(rc); + goto error; + } + } + nprocs++; + } + /* move to next node */ + if (opal_list_get_end(&node_list) == opal_list_get_next(item)) { + item = opal_list_get_first(&node_list); + } + else { + item = opal_list_get_next(item); + } + } while (item != start && nprocs < np); + + /* save the bookmark */ + jdata->bookmark = node; + + /* if we haven't assigned all the procs, then loop through the list + * again, assigning 1 per node until all are assigned + */ + item = start; + while (nprocs < np) { + node = (orte_node_t*)item; + if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, + jdata->map->cpus_per_rank, app->idx, + &node_list, jdata->map->oversubscribe, + false, NULL))) { + /* if the code is not ORTE_ERR_NODE_FULLY_USED, then that is an error */ + if (ORTE_ERR_NODE_FULLY_USED != OPAL_SOS_GET_ERROR_CODE(rc)) { + ORTE_ERROR_LOG(rc); + goto error; + } + } + nprocs++; + /* move to next node */ + if (opal_list_get_end(&node_list) == opal_list_get_next(item)) { + item = opal_list_get_first(&node_list); + } + else { + item = opal_list_get_next(item); + } + } + /* save the bookmark */ + jdata->bookmark = node; + + /* cleanup */ + while (NULL != (item = opal_list_remove_first(&node_list))) { + OBJ_RELEASE(item); + } + /* if the user requested a specific number of procs and + * the total number of procs we were able to assign + * doesn't equal the number requested, then we have a + * problem + */ + if (0 < app->num_procs && nprocs < app->num_procs) { + orte_show_help("help-orte-rmaps-base.txt", "rmaps:too-many-procs", true, + app->app, app->num_procs, + "number of slots", nprocs, + "number of nodes", num_nodes); + return ORTE_ERR_SILENT; + } + /* update the number of procs in the job */ + jdata->num_procs += nprocs; + /* compute vpids and add proc objects to the job - this has to be + * done after each app_context is mapped in order to keep the + * vpids contiguous within an app_context + */ + if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) { + ORTE_ERROR_LOG(rc); + return rc; + } + } + +error: + while(NULL != (item = opal_list_remove_first(&node_list))) { + OBJ_RELEASE(item); + } + OBJ_DESTRUCT(&node_list); + + return rc; +} + diff --git a/orte/mca/rmaps/load_balance/rmaps_lb.h b/orte/mca/rmaps/load_balance/rmaps_lb.h new file mode 100644 index 0000000000..cf998690d3 --- /dev/null +++ b/orte/mca/rmaps/load_balance/rmaps_lb.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + * + * Resource Mapping + */ +#ifndef ORTE_RMAPS_LB_H +#define ORTE_RMAPS_LB_H + +#include "orte_config.h" +#include "orte/mca/rmaps/rmaps.h" + +BEGIN_C_DECLS + +struct orte_rmaps_lb_component_t { + orte_rmaps_base_component_t super; + int npernode; + int nperboard; + int npersocket; +}; +typedef struct orte_rmaps_lb_component_t orte_rmaps_lb_component_t; + +ORTE_MODULE_DECLSPEC extern orte_rmaps_lb_component_t mca_rmaps_load_balance_component; +extern orte_rmaps_base_module_t orte_rmaps_load_balance_module; + + +END_C_DECLS + +#endif diff --git a/orte/mca/rmaps/load_balance/rmaps_lb_component.c b/orte/mca/rmaps/load_balance/rmaps_lb_component.c new file mode 100644 index 0000000000..6bfcd8a192 --- /dev/null +++ b/orte/mca/rmaps/load_balance/rmaps_lb_component.c @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/constants.h" + +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_param.h" +#include "opal/mca/paffinity/paffinity.h" + +#include "orte/mca/rmaps/base/base.h" +#include "rmaps_lb.h" + +/* + * Local functions + */ + +static int orte_rmaps_lb_open(void); +static int orte_rmaps_lb_close(void); +static int orte_rmaps_lb_query(mca_base_module_t **module, int *priority); + +static int my_priority; + +orte_rmaps_lb_component_t mca_rmaps_load_balance_component = { + { + { + ORTE_RMAPS_BASE_VERSION_2_0_0, + + "load_balance", /* MCA component name */ + ORTE_MAJOR_VERSION, /* MCA component major version */ + ORTE_MINOR_VERSION, /* MCA component minor version */ + ORTE_RELEASE_VERSION, /* MCA component release version */ + orte_rmaps_lb_open, /* component open */ + orte_rmaps_lb_close, /* component close */ + orte_rmaps_lb_query /* component query */ + }, + { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + } + } +}; + + +/** + * component open/close/init function + */ +static int orte_rmaps_lb_open(void) +{ + mca_base_component_t *c = &mca_rmaps_load_balance_component.super.base_version; + int value, tmp; + + /* initialize */ + mca_rmaps_load_balance_component.npernode = 0; + mca_rmaps_load_balance_component.nperboard = 0; + mca_rmaps_load_balance_component.npersocket = 0; + + mca_base_param_reg_int(c, "priority", + "Priority of the loadbalance rmaps component", + false, false, 80, + &my_priority); + + /* check for procs/xxx directives */ + tmp = mca_base_param_reg_int(c, "pernode", + "Launch one ppn as directed", + false, false, (int)false, NULL); + mca_base_param_reg_syn_name(tmp, "rmaps", "base_pernode", false); + mca_base_param_lookup_int(tmp, &value); + if (value) { + mca_rmaps_load_balance_component.npernode = 1; + } + + /* #procs/node */ + tmp = mca_base_param_reg_int(c, "n_pernode", + "Launch n procs/node", + false, false, mca_rmaps_load_balance_component.npernode, NULL); + mca_base_param_reg_syn_name(tmp, "rmaps", "base_n_pernode", false); + mca_base_param_lookup_int(tmp, &mca_rmaps_load_balance_component.npernode); + + /* #procs/board */ + tmp = mca_base_param_reg_int(c, "n_perboard", + "Launch n procs/board", + false, false, -1, NULL); + mca_base_param_reg_syn_name(tmp, "rmaps", "base_n_perboard", false); + mca_base_param_lookup_int(tmp, &mca_rmaps_load_balance_component.nperboard); + if (0 < mca_rmaps_load_balance_component.nperboard) { + ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_NPERXXX); + } + + /* #procs/socket */ + tmp = mca_base_param_reg_int(c, "n_persocket", + "Launch n procs/socket", + false, false, -1, NULL); + mca_base_param_reg_syn_name(tmp, "rmaps", "base_n_persocket", false); + mca_base_param_lookup_int(tmp, &mca_rmaps_load_balance_component.npersocket); + if (0 < mca_rmaps_load_balance_component.npersocket) { + ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_NPERXXX); + /* force bind to socket if not overridden by user */ + ORTE_XSET_BINDING_POLICY(ORTE_BIND_TO_SOCKET); + } + + return ORTE_SUCCESS; +} + + +static int orte_rmaps_lb_query(mca_base_module_t **module, int *priority) +{ + /* after rr, unless lb values are set */ + if (0 < mca_rmaps_load_balance_component.npernode || + 0 < mca_rmaps_load_balance_component.nperboard || + 0 < mca_rmaps_load_balance_component.npersocket) { + my_priority = 10000; + } + *priority = my_priority; + *module = (mca_base_module_t *)&orte_rmaps_load_balance_module; + return ORTE_SUCCESS; +} + +/** + * Close all subsystems. + */ + +static int orte_rmaps_lb_close(void) +{ + return ORTE_SUCCESS; +} + +