1
1

enable ptmalloc with using uGNI

This commit was SVN r25621.
Этот коммит содержится в:
Nathan Hjelm 2011-12-12 20:52:51 +00:00
родитель ad6f0ac59d
Коммит 885d5cbcf8
6 изменённых файлов: 833 добавлений и 1 удалений

Просмотреть файл

@ -791,7 +791,8 @@ static void opal_memory_linux_malloc_init_hook(void)
0 == stat("/dev/myri7", &st) ||
0 == stat("/dev/myri8", &st) ||
0 == stat("/dev/myri9", &st) ||
0 == stat("/dev/ipath", &st)) {
0 == stat("/dev/ipath", &st) ||
0 == stat("/dev/kgni0", &st)) {
found_driver = true;
}

46
orte/mca/rmaps/load_balance/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,46 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
dist_pkgdata_DATA = help-orte-rmaps-lb.txt
sources = \
rmaps_lb.c \
rmaps_lb.h \
rmaps_lb_component.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_orte_rmaps_load_balance_DSO
component_noinst =
component_install = mca_rmaps_load_balance.la
else
component_noinst = libmca_rmaps_load_balance.la
component_install =
endif
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_rmaps_load_balance_la_SOURCES = $(sources)
mca_rmaps_load_balance_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_rmaps_load_balance_la_SOURCES =$(sources)
libmca_rmaps_load_balance_la_LDFLAGS = -module -avoid-version

Просмотреть файл

@ -0,0 +1,53 @@
# -*- text -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the US/English general help file for Open RTE's orterun.
#
[orte-rmaps-rr:alloc-error]
There are not enough slots available in the system to satisfy the %d slots
that were requested by the application:
%s
Either request fewer slots for your application, or make more slots available
for use.
[orte-rmaps-rr:multi-apps-and-zero-np]
RMAPS found multiple applications to be launched, with
at least one that failed to specify the number of processes to execute.
When specifying multiple applications, you must specify how many processes
of each to launch via the -np argument.
[orte-rmaps-rr:per-node-and-too-many-procs]
There are not enough nodes in your allocation to satisfy your request to launch
%d processes on a per-node basis - only %d nodes were available.
Either request fewer processes, or obtain a larger allocation.
[orte-rmaps-rr:n-per-node-and-too-many-procs]
There are not enough nodes in your allocation to satisfy your request to launch
%d processes on a %d per-node basis - only %d nodes with a total of %d slots were available.
Either request fewer processes, or obtain a larger allocation.
[orte-rmaps-rr:n-per-node-and-not-enough-slots]
There are not enough slots on the nodes in your allocation to satisfy your request to launch on a %d process-per-node basis - only %d slots/node were available.
Either request fewer processes/node, or obtain a larger allocation.
[orte-rmaps-rr:no-np-and-user-map]
You have specified a rank-to-node/slot mapping, but failed to provide
the number of processes to be executed. For some reason, this information
could not be obtained from the mapping you provided, so we cannot continue
with executing the specified application.

544
orte/mca/rmaps/load_balance/rmaps_lb.c Обычный файл
Просмотреть файл

@ -0,0 +1,544 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "orte/types.h"
#include <errno.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#include "opal/mca/base/mca_base_param.h"
#include "opal/util/opal_sos.h"
#include "orte/util/show_help.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/rmaps/base/base.h"
#include "rmaps_lb.h"
static int switchyard(orte_job_t *jdata);
orte_rmaps_base_module_t orte_rmaps_load_balance_module = {
switchyard
};
/* Local functions */
static int npernode(orte_job_t *jdata);
static int nperboard(orte_job_t *jdata);
static int npersocket(orte_job_t *jdata);
static int loadbalance(orte_job_t *jdata);
static int switchyard(orte_job_t *jdata)
{
int rc;
mca_base_component_t *c = &mca_rmaps_load_balance_component.super.base_version;
/* only handle initial launch of loadbalanced
* or NPERxxx jobs - allow restarting of failed apps
*/
if (ORTE_JOB_STATE_INIT != jdata->state) {
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:lb: job %s not in initial state - loadbalance cannot map",
ORTE_JOBID_PRINT(jdata->jobid));
return ORTE_ERR_TAKE_NEXT_OPTION;
}
if (NULL != jdata->map->req_mapper &&
0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name)) {
/* a mapper has been specified, and it isn't me */
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:lb: job %s not using loadbalance mapper",
ORTE_JOBID_PRINT(jdata->jobid));
return ORTE_ERR_TAKE_NEXT_OPTION;
}
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:loadbalance: mapping job %s",
ORTE_JOBID_PRINT(jdata->jobid));
/* flag that I did the mapping */
if (NULL != jdata->map->last_mapper) {
free(jdata->map->last_mapper);
}
jdata->map->last_mapper = strdup(c->mca_component_name);
if (0 < mca_rmaps_load_balance_component.npernode ||
0 < jdata->map->npernode) {
rc = npernode(jdata);
} else if (0 < mca_rmaps_load_balance_component.nperboard ||
0 < jdata->map->nperboard) {
rc = nperboard(jdata);
} else if (0 < mca_rmaps_load_balance_component.npersocket ||
0 < jdata->map->npersocket) {
rc = npersocket(jdata);
} else {
rc = loadbalance(jdata);
}
if (ORTE_SUCCESS != rc) {
return rc;
}
/* compute and save local ranks */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* define the daemons that we will use for this job */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_define_daemons(jdata))) {
ORTE_ERROR_LOG(rc);
}
return rc;
}
/* place specified #procs on each node, up to the specified total
* number of procs (if one was given).
*/
static int npernode(orte_job_t *jdata)
{
orte_app_context_t *app;
int j, rc=ORTE_SUCCESS;
opal_list_t node_list;
opal_list_item_t *item;
orte_std_cntr_t num_slots;
orte_node_t *node;
int np, nprocs;
int num_nodes;
/* setup the node list */
OBJ_CONSTRUCT(&node_list, opal_list_t);
/* can only have one app_context here */
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
/* use the number of procs if one was given */
if (0 < app->num_procs) {
np = app->num_procs;
} else {
np = INT_MAX;
}
/* for each app_context, we have to get the list of nodes that it can
* use since that can now be modified with a hostfile and/or -host
* option
*/
if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app,
jdata->map->policy))) {
ORTE_ERROR_LOG(rc);
goto error;
}
/* loop through the list of nodes */
num_nodes = opal_list_get_size(&node_list);
nprocs = 0;
while (NULL != (item = opal_list_remove_first(&node_list))) {
node = (orte_node_t*)item;
/* put the specified number of procs on each node */
for (j=0; j < mca_rmaps_load_balance_component.npernode && nprocs < np; j++) {
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node,
jdata->map->cpus_per_rank, app->idx,
&node_list, jdata->map->oversubscribe,
false, NULL))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, and we still have
* more procs to place, then that is an error
*/
if (ORTE_ERR_NODE_FULLY_USED != OPAL_SOS_GET_ERROR_CODE(rc) ||
j < mca_rmaps_load_balance_component.npernode-1) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(node);
goto error;
}
}
nprocs++;
}
OBJ_RELEASE(node);
}
/* if the user requested a specific number of procs and
* the total number of procs we were able to assign
* doesn't equal the number requested, then we have a
* problem
*/
if (0 < app->num_procs && nprocs < app->num_procs) {
orte_show_help("help-orte-rmaps-base.txt", "rmaps:too-many-procs", true,
app->app, app->num_procs,
"number of nodes", num_nodes,
"npernode", mca_rmaps_load_balance_component.npernode);
return ORTE_ERR_SILENT;
}
/* update the number of procs in the job */
jdata->num_procs += nprocs;
/* compute vpids and add proc objects to the job - this has to be
* done after each app_context is mapped in order to keep the
* vpids contiguous within an app_context
*/
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) {
ORTE_ERROR_LOG(rc);
return rc;
}
error:
while (NULL != (item = opal_list_remove_first(&node_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&node_list);
return rc;
}
static int nperboard(orte_job_t *jdata)
{
orte_app_context_t *app;
int j, k, rc=ORTE_SUCCESS;
opal_list_t node_list;
opal_list_item_t *item;
orte_std_cntr_t num_slots;
orte_node_t *node;
int np, nprocs;
int num_boards=orte_default_num_boards;
/* setup the node list */
OBJ_CONSTRUCT(&node_list, opal_list_t);
/* can only have one app_context here */
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
/* use the number of procs if one was given */
if (0 < app->num_procs) {
np = app->num_procs;
} else {
np = INT_MAX;
}
/* for each app_context, we have to get the list of nodes that it can
* use since that can now be modified with a hostfile and/or -host
* option
*/
if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app,
jdata->map->policy))) {
ORTE_ERROR_LOG(rc);
goto error;
}
/* loop through the list of nodes */
nprocs = 0;
while (NULL != (item = opal_list_remove_first(&node_list))) {
node = (orte_node_t*)item;
num_boards = node->boards;
/* loop through the number of boards in this node */
for (k=0; k < node->boards && nprocs < np; k++) {
/* put the specified number of procs on each board */
for (j=0; j < mca_rmaps_load_balance_component.nperboard && nprocs < np; j++) {
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node,
jdata->map->cpus_per_rank, app->idx,
&node_list, jdata->map->oversubscribe,
false, NULL))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, and we still have
* more procs to place, then that is an error
*/
if (ORTE_ERR_NODE_FULLY_USED != OPAL_SOS_GET_ERROR_CODE(rc) ||
j < mca_rmaps_load_balance_component.nperboard-1) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(node);
goto error;
}
}
nprocs++;
}
}
OBJ_RELEASE(node);
}
/* if the user requested a specific number of procs and
* the total number of procs we were able to assign
* doesn't equal the number requested, then we have a
* problem
*/
if (0 < app->num_procs && nprocs < app->num_procs) {
orte_show_help("help-orte-rmaps-base.txt", "rmaps:too-many-procs", true,
app->app, app->num_procs,
"number of boards", num_boards,
"nperboard", mca_rmaps_load_balance_component.nperboard);
return ORTE_ERR_SILENT;
}
/* update the number of procs in the job */
jdata->num_procs += nprocs;
/* compute vpids and add proc objects to the job - this has to be
* done after each app_context is mapped in order to keep the
* vpids contiguous within an app_context
*/
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) {
ORTE_ERROR_LOG(rc);
return rc;
}
error:
while (NULL != (item = opal_list_remove_first(&node_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&node_list);
return rc;
}
static int npersocket(orte_job_t *jdata)
{
orte_app_context_t *app;
int j, k, n, rc=ORTE_SUCCESS;
opal_list_t node_list;
opal_list_item_t *item;
orte_std_cntr_t num_slots;
orte_node_t *node;
int np, nprocs;
int num_sockets=orte_default_num_sockets_per_board;
/* setup the node list */
OBJ_CONSTRUCT(&node_list, opal_list_t);
/* can only have one app_context here */
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, 0))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
/* use the number of procs if one was given */
if (0 < app->num_procs) {
np = app->num_procs;
} else {
np = INT_MAX;
}
/* for each app_context, we have to get the list of nodes that it can
* use since that can now be modified with a hostfile and/or -host
* option
*/
if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app,
jdata->map->policy))) {
ORTE_ERROR_LOG(rc);
goto error;
}
/* loop through the list of nodes */
nprocs = 0;
while (NULL != (item = opal_list_remove_first(&node_list))) {
node = (orte_node_t*)item;
num_sockets = node->sockets_per_board;
/* loop through the number of boards in this node */
for (k=0; k < node->boards && nprocs < np; k++) {
/* loop through the number of sockets/board */
for (n=0; n < node->sockets_per_board && nprocs < np; n++) {
/* put the specified number of procs on each socket */
for (j=0; j < mca_rmaps_load_balance_component.npersocket && nprocs < np; j++) {
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node,
jdata->map->cpus_per_rank, app->idx,
&node_list, jdata->map->oversubscribe,
false, NULL))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, and we still have
* more procs to place, then that is an error
*/
if (ORTE_ERR_NODE_FULLY_USED != OPAL_SOS_GET_ERROR_CODE(rc) ||
j < mca_rmaps_load_balance_component.npersocket-1) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(node);
goto error;
}
}
/* track the number of procs */
nprocs++;
}
}
}
OBJ_RELEASE(node);
}
/* if the user requested a specific number of procs and
* the total number of procs we were able to assign
* doesn't equal the number requested, then we have a
* problem
*/
if (0 < app->num_procs && nprocs < app->num_procs) {
orte_show_help("help-orte-rmaps-base.txt", "rmaps:too-many-procs", true,
app->app, app->num_procs,
"number of sockets", num_sockets,
"npersocket", mca_rmaps_load_balance_component.npersocket);
return ORTE_ERR_SILENT;
}
/* update the number of procs in the job */
jdata->num_procs += nprocs;
/* compute vpids and add proc objects to the job - this has to be
* done after each app_context is mapped in order to keep the
* vpids contiguous within an app_context
*/
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) {
ORTE_ERROR_LOG(rc);
return rc;
}
error:
while (NULL != (item = opal_list_remove_first(&node_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&node_list);
return rc;
}
/*
* Create a load balanced mapping for the job by assigning a constant #procs/node, with
* leftovers being spread one/node starting from the first node.
*/
static int loadbalance(orte_job_t *jdata)
{
orte_app_context_t *app;
int i, j;
opal_list_t node_list;
orte_std_cntr_t num_nodes, num_slots;
int rc=ORTE_SUCCESS, np, nprocs;
int ppn = 0;
opal_list_item_t *item, *start;
orte_node_t *node;
/* setup */
OBJ_CONSTRUCT(&node_list, opal_list_t);
/* compute total #procs we are going to add and the total number of nodes available */
for(i=0; i < jdata->apps->size; i++) {
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
continue;
}
/* get the nodes and #slots available for this app_context */
if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app,
jdata->map->policy))) {
ORTE_ERROR_LOG(rc);
goto error;
}
if (0 < app->num_procs) {
np = app->num_procs;
} else {
/* set the num_procs to the #slots */
np = num_slots;
}
num_nodes = opal_list_get_size(&node_list);
/* compute the base ppn */
ppn = np / num_nodes;
/* if a bookmark exists from some prior mapping, set us to start there */
start = orte_rmaps_base_get_starting_point(&node_list, jdata);
/* loop through the list of nodes until we either assign all the procs
* or return to the starting point
*/
item = start;
nprocs = 0;
do {
node = (orte_node_t*)item;
/* put the specified number of procs on each node */
for (j=0; j < ppn; j++) {
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node,
jdata->map->cpus_per_rank, app->idx,
&node_list, jdata->map->oversubscribe,
false, NULL))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, and we still have
* more procs to place, then that is an error
*/
if (ORTE_ERR_NODE_FULLY_USED != OPAL_SOS_GET_ERROR_CODE(rc) ||
j < ppn-1) {
ORTE_ERROR_LOG(rc);
goto error;
}
}
nprocs++;
}
/* move to next node */
if (opal_list_get_end(&node_list) == opal_list_get_next(item)) {
item = opal_list_get_first(&node_list);
}
else {
item = opal_list_get_next(item);
}
} while (item != start && nprocs < np);
/* save the bookmark */
jdata->bookmark = node;
/* if we haven't assigned all the procs, then loop through the list
* again, assigning 1 per node until all are assigned
*/
item = start;
while (nprocs < np) {
node = (orte_node_t*)item;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node,
jdata->map->cpus_per_rank, app->idx,
&node_list, jdata->map->oversubscribe,
false, NULL))) {
/* if the code is not ORTE_ERR_NODE_FULLY_USED, then that is an error */
if (ORTE_ERR_NODE_FULLY_USED != OPAL_SOS_GET_ERROR_CODE(rc)) {
ORTE_ERROR_LOG(rc);
goto error;
}
}
nprocs++;
/* move to next node */
if (opal_list_get_end(&node_list) == opal_list_get_next(item)) {
item = opal_list_get_first(&node_list);
}
else {
item = opal_list_get_next(item);
}
}
/* save the bookmark */
jdata->bookmark = node;
/* cleanup */
while (NULL != (item = opal_list_remove_first(&node_list))) {
OBJ_RELEASE(item);
}
/* if the user requested a specific number of procs and
* the total number of procs we were able to assign
* doesn't equal the number requested, then we have a
* problem
*/
if (0 < app->num_procs && nprocs < app->num_procs) {
orte_show_help("help-orte-rmaps-base.txt", "rmaps:too-many-procs", true,
app->app, app->num_procs,
"number of slots", nprocs,
"number of nodes", num_nodes);
return ORTE_ERR_SILENT;
}
/* update the number of procs in the job */
jdata->num_procs += nprocs;
/* compute vpids and add proc objects to the job - this has to be
* done after each app_context is mapped in order to keep the
* vpids contiguous within an app_context
*/
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
error:
while(NULL != (item = opal_list_remove_first(&node_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&node_list);
return rc;
}

45
orte/mca/rmaps/load_balance/rmaps_lb.h Обычный файл
Просмотреть файл

@ -0,0 +1,45 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* Resource Mapping
*/
#ifndef ORTE_RMAPS_LB_H
#define ORTE_RMAPS_LB_H
#include "orte_config.h"
#include "orte/mca/rmaps/rmaps.h"
BEGIN_C_DECLS
struct orte_rmaps_lb_component_t {
orte_rmaps_base_component_t super;
int npernode;
int nperboard;
int npersocket;
};
typedef struct orte_rmaps_lb_component_t orte_rmaps_lb_component_t;
ORTE_MODULE_DECLSPEC extern orte_rmaps_lb_component_t mca_rmaps_load_balance_component;
extern orte_rmaps_base_module_t orte_rmaps_load_balance_module;
END_C_DECLS
#endif

Просмотреть файл

@ -0,0 +1,143 @@
/*
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/mca/paffinity/paffinity.h"
#include "orte/mca/rmaps/base/base.h"
#include "rmaps_lb.h"
/*
* Local functions
*/
static int orte_rmaps_lb_open(void);
static int orte_rmaps_lb_close(void);
static int orte_rmaps_lb_query(mca_base_module_t **module, int *priority);
static int my_priority;
orte_rmaps_lb_component_t mca_rmaps_load_balance_component = {
{
{
ORTE_RMAPS_BASE_VERSION_2_0_0,
"load_balance", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
orte_rmaps_lb_open, /* component open */
orte_rmaps_lb_close, /* component close */
orte_rmaps_lb_query /* component query */
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
}
};
/**
* component open/close/init function
*/
static int orte_rmaps_lb_open(void)
{
mca_base_component_t *c = &mca_rmaps_load_balance_component.super.base_version;
int value, tmp;
/* initialize */
mca_rmaps_load_balance_component.npernode = 0;
mca_rmaps_load_balance_component.nperboard = 0;
mca_rmaps_load_balance_component.npersocket = 0;
mca_base_param_reg_int(c, "priority",
"Priority of the loadbalance rmaps component",
false, false, 80,
&my_priority);
/* check for procs/xxx directives */
tmp = mca_base_param_reg_int(c, "pernode",
"Launch one ppn as directed",
false, false, (int)false, NULL);
mca_base_param_reg_syn_name(tmp, "rmaps", "base_pernode", false);
mca_base_param_lookup_int(tmp, &value);
if (value) {
mca_rmaps_load_balance_component.npernode = 1;
}
/* #procs/node */
tmp = mca_base_param_reg_int(c, "n_pernode",
"Launch n procs/node",
false, false, mca_rmaps_load_balance_component.npernode, NULL);
mca_base_param_reg_syn_name(tmp, "rmaps", "base_n_pernode", false);
mca_base_param_lookup_int(tmp, &mca_rmaps_load_balance_component.npernode);
/* #procs/board */
tmp = mca_base_param_reg_int(c, "n_perboard",
"Launch n procs/board",
false, false, -1, NULL);
mca_base_param_reg_syn_name(tmp, "rmaps", "base_n_perboard", false);
mca_base_param_lookup_int(tmp, &mca_rmaps_load_balance_component.nperboard);
if (0 < mca_rmaps_load_balance_component.nperboard) {
ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_NPERXXX);
}
/* #procs/socket */
tmp = mca_base_param_reg_int(c, "n_persocket",
"Launch n procs/socket",
false, false, -1, NULL);
mca_base_param_reg_syn_name(tmp, "rmaps", "base_n_persocket", false);
mca_base_param_lookup_int(tmp, &mca_rmaps_load_balance_component.npersocket);
if (0 < mca_rmaps_load_balance_component.npersocket) {
ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_NPERXXX);
/* force bind to socket if not overridden by user */
ORTE_XSET_BINDING_POLICY(ORTE_BIND_TO_SOCKET);
}
return ORTE_SUCCESS;
}
static int orte_rmaps_lb_query(mca_base_module_t **module, int *priority)
{
/* after rr, unless lb values are set */
if (0 < mca_rmaps_load_balance_component.npernode ||
0 < mca_rmaps_load_balance_component.nperboard ||
0 < mca_rmaps_load_balance_component.npersocket) {
my_priority = 10000;
}
*priority = my_priority;
*module = (mca_base_module_t *)&orte_rmaps_load_balance_module;
return ORTE_SUCCESS;
}
/**
* Close all subsystems.
*/
static int orte_rmaps_lb_close(void)
{
return ORTE_SUCCESS;
}