1
1

Okay, since a certain other RM out there made a fuss about being able to lock their daemons to specified cores, offer the same option here. The MCA param orte_daemon_cores can be used to specify which core(s) you want the orte daemons to use. This will have no bearing on the application procs - unbound will remain unbound, and binding directives will be applied to the apps.

Yippee skippee...

This commit was SVN r30513.
This commit is contained in:
Ralph Castain 2014-01-30 23:50:14 +00:00
parent 4c646ab06b
commit 193cceb483
9 changed files with 142 additions and 14 deletions

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -250,7 +250,7 @@ OPAL_DECLSPEC int opal_hwloc_print(char **output, char *prefix,
* Make a prettyprint string for a hwloc_cpuset_t (e.g., "socket
* 2[core 3]").
*/
int opal_hwloc_base_cset2str(char *str, int len, hwloc_cpuset_t cpuset);
OPAL_DECLSPEC int opal_hwloc_base_cset2str(char *str, int len, hwloc_cpuset_t cpuset);
/**
* Make a prettyprint string for a cset in a map format.
@ -260,7 +260,10 @@ int opal_hwloc_base_cset2str(char *str, int len, hwloc_cpuset_t cpuset);
* . - signifies PU a process not bound to
* B - signifies PU a process is bound to
*/
int opal_hwloc_base_cset2mapstr(char *str, int len, hwloc_cpuset_t cpuset);
OPAL_DECLSPEC int opal_hwloc_base_cset2mapstr(char *str, int len, hwloc_cpuset_t cpuset);
/* get the hwloc object that corresponds to the given LOGICAL processor id */
OPAL_DECLSPEC hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo, int lid);
#endif

View File

@ -12,7 +12,7 @@
* Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -48,7 +48,7 @@
* only find PUs (!). On such platforms, then do the same calculation
* but with PUs instead of COREs.
*/
static hwloc_obj_t get_pu(hwloc_topology_t topo, int lid)
hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo, int lid)
{
hwloc_obj_type_t obj_type = HWLOC_OBJ_CORE;
hwloc_obj_t obj;
@ -130,7 +130,7 @@ int opal_hwloc_base_filter_cpus(hwloc_topology_t topo)
case 1:
/* only one cpu given - get that object */
cpu = strtoul(range[0], NULL, 10);
if (NULL == (pu = get_pu(topo, cpu))) {
if (NULL == (pu = opal_hwloc_base_get_pu(topo, cpu))) {
opal_argv_free(ranges);
opal_argv_free(range);
return OPAL_ERROR;
@ -144,7 +144,7 @@ int opal_hwloc_base_filter_cpus(hwloc_topology_t topo)
start = strtoul(range[0], NULL, 10);
end = strtoul(range[1], NULL, 10);
for (cpu=start; cpu <= end; cpu++) {
if (NULL == (pu = get_pu(topo, cpu))) {
if (NULL == (pu = opal_hwloc_base_get_pu(topo, cpu))) {
opal_argv_free(ranges);
opal_argv_free(range);
hwloc_bitmap_free(avail);
@ -1265,7 +1265,7 @@ int opal_hwloc_base_slot_list_parse(const char *slot_str,
for (j=0; NULL != list[j]; j++) {
core_id = atoi(list[j]);
/* find the specified logical available cpu */
if (NULL == (pu = get_pu(topo, core_id))) {
if (NULL == (pu = opal_hwloc_base_get_pu(topo, core_id))) {
opal_argv_free(range);
opal_argv_free(item);
return OPAL_ERROR;
@ -1283,7 +1283,7 @@ int opal_hwloc_base_slot_list_parse(const char *slot_str,
upper_range = atoi(range[1]);
for (core_id=lower_range; core_id <= upper_range; core_id++) {
/* find the specified logical available cpu */
if (NULL == (pu = get_pu(topo, core_id))) {
if (NULL == (pu = opal_hwloc_base_get_pu(topo, core_id))) {
opal_argv_free(range);
opal_argv_free(item);
return OPAL_ERROR;

View File

@ -128,3 +128,11 @@ Error message received from:
Message:
%s
#
[incorrectly-bound]
WARNING: Open MPI incorrectly bound a process to the daemon's cores.
This is a warning only; your job will continue.
Local host: %s
Application name: %s
Location: %s:%d

View File

@ -15,7 +15,7 @@
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
*
* $COPYRIGHT$
*
@ -430,9 +430,32 @@ static int do_child(orte_app_context_t* context,
#if OPAL_HAVE_HWLOC
{
hwloc_cpuset_t cpuset;
hwloc_obj_t root;
opal_hwloc_topo_data_t *sum;
/* Set process affinity, if given */
if (NULL != child->cpu_bitmap) {
if (NULL == child->cpu_bitmap) {
/* if the daemon is bound, then we need to "free" this proc */
if (NULL != orte_daemon_cores) {
root = hwloc_get_root_obj(opal_hwloc_topology);
if (NULL == root->userdata) {
send_warn_show_help(write_fd,
"help-orte-odls-default.txt", "incorrectly bound",
orte_process_info.nodename, context->app,
__FILE__, __LINE__);
}
sum = (opal_hwloc_topo_data_t*)root->userdata;
/* bind this proc to all available processors */
hwloc_set_cpubind(opal_hwloc_topology, sum->available, 0);
}
if (opal_hwloc_report_bindings) {
opal_output(0, "MCW rank %d is not bound (or bound to all available processors)", child->name.vpid);
/* avoid reporting it twice */
(void) mca_base_var_env_name ("hwloc_base_report_bindings", &param);
opal_unsetenv(param, &environ_copy);
free(param);
}
} else {
if (0 == strlen(child->cpu_bitmap)) {
/* this proc is not bound */
if (opal_hwloc_report_bindings) {
@ -442,6 +465,19 @@ static int do_child(orte_app_context_t* context,
opal_unsetenv(param, &environ_copy);
free(param);
}
/* if the daemon is bound, then we need to "free" this proc */
if (NULL != orte_daemon_cores) {
root = hwloc_get_root_obj(opal_hwloc_topology);
if (NULL == root->userdata) {
send_warn_show_help(write_fd,
"help-orte-odls-default.txt", "incorrectly bound",
orte_process_info.nodename, context->app,
__FILE__, __LINE__);
}
sum = (opal_hwloc_topo_data_t*)root->userdata;
/* bind this proc to all available processors */
hwloc_set_cpubind(opal_hwloc_topology, sum->available, 0);
}
/* Set an info MCA param that tells
the launched processes that it was bound by us (e.g., so that
MPI_INIT doesn't try to bind itself) */

View File

@ -10,6 +10,7 @@
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2014 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -31,3 +32,13 @@ in the environment. Returned value %d instead of ORTE_SUCCESS.
Open RTE was unable to initialize properly. The error occured while
attempting to %s. Returned value %d instead of ORTE_SUCCESS.
#
[orted:cannot-bind]
A request was made to bind the Open RTE daemons to
a core that does not exist on this node:
node: %s
cores: %s
The MCA param directing this behavior is orte_daemon_cores.
Please correct the request and try again.

View File

@ -15,7 +15,7 @@
* Copyright (c) 2009 Institut National de Recherche en Informatique
* et Automatique. All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -70,6 +70,7 @@
#include "orte/util/session_dir.h"
#include "orte/util/name_fns.h"
#include "orte/util/nidmap.h"
#include "orte/util/parse_options.h"
#include "orte/mca/rml/base/rml_contact.h"
#include "orte/mca/errmgr/errmgr.h"
@ -369,6 +370,59 @@ int orte_daemon(int argc, char *argv[])
*/
opal_finalize_util();
#if OPAL_HAVE_HWLOC
/* bind ourselves if so directed */
if (NULL != orte_daemon_cores) {
char **cores=NULL, tmp[128];
hwloc_obj_t pu;
hwloc_cpuset_t ours, pucpus, res;
int core;
/* could be a collection of comma-delimited ranges, so
* use our handy utility to parse it
*/
orte_util_parse_range_options(orte_daemon_cores, &cores);
if (NULL != cores) {
ours = hwloc_bitmap_alloc();
hwloc_bitmap_zero(ours);
pucpus = hwloc_bitmap_alloc();
res = hwloc_bitmap_alloc();
for (i=0; NULL != cores[i]; i++) {
core = strtoul(cores[i], NULL, 10);
if (NULL == (pu = opal_hwloc_base_get_pu(opal_hwloc_topology, core))) {
/* turn off the show help forwarding as we won't
* be able to cycle the event library to send
*/
orte_show_help_finalize();
/* the message will now come out locally */
orte_show_help("help-orted.txt", "orted:cannot-bind",
true, orte_process_info.nodename,
orte_daemon_cores);
ret = ORTE_ERR_NOT_SUPPORTED;
goto DONE;
}
hwloc_bitmap_and(pucpus, pu->online_cpuset, pu->allowed_cpuset);
hwloc_bitmap_or(res, ours, pucpus);
hwloc_bitmap_copy(ours, res);
}
/* if the result is all zeros, then don't bind */
if (!hwloc_bitmap_iszero(ours)) {
(void)hwloc_set_cpubind(opal_hwloc_topology, ours, 0);
if (opal_hwloc_report_bindings) {
opal_hwloc_base_cset2mapstr(tmp, sizeof(tmp), ours);
opal_output(0, "Daemon %s is bound to cores %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp);
}
}
/* cleanup */
hwloc_bitmap_free(ours);
hwloc_bitmap_free(pucpus);
hwloc_bitmap_free(res);
opal_argv_free(cores);
}
}
#endif
if ((int)ORTE_VPID_INVALID != orted_globals.fail) {
orted_globals.abort=false;
/* some vpid was ordered to fail. The value can be positive
@ -807,6 +861,7 @@ int orte_daemon(int argc, char *argv[])
if (orte_debug_daemons_flag) {
opal_output(0, "%s orted: up and running - waiting for commands!", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
}
ret = ORTE_SUCCESS;
/* loop the event lib until an exit event is detected */
while (orte_event_base_active) {
@ -818,7 +873,7 @@ int orte_daemon(int argc, char *argv[])
DONE:
/* update the exit status, in case it wasn't done */
ORTE_UPDATE_EXIT_STATUS(orte_exit_status);
ORTE_UPDATE_EXIT_STATUS(ret);
/* cleanup and leave */
orte_finalize();

View File

@ -13,7 +13,7 @@
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -214,6 +214,7 @@ bool orte_report_silent_errors = false;
param */
bool orte_in_parallel_debugger = false;
char *orte_daemon_cores = NULL;
int orte_dt_init(void)
{

View File

@ -737,6 +737,11 @@ ORTE_DECLSPEC extern opal_byte_object_t orte_pidmap;
/* user debugger */
ORTE_DECLSPEC extern char *orte_base_user_debugger;
/* binding directives for daemons to restrict them
* to certain cores
*/
ORTE_DECLSPEC extern char *orte_daemon_cores;
END_C_DECLS
#endif /* ORTE_RUNTIME_ORTE_GLOBALS_H */

View File

@ -753,5 +753,14 @@ int orte_register_params(void)
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&orte_soft_locations);
/* allow specification of the cores to be used by daemons */
orte_daemon_cores = NULL;
(void) mca_base_var_register ("orte", "orte", NULL, "daemon_cores",
"Restrict the ORTE daemons (including mpirun) to operate on the specified cores",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&orte_daemon_cores);
return ORTE_SUCCESS;
}