Okay, since a certain other RM out there made a fuss about being able to lock their daemons to specified cores, offer the same option here. The MCA param orte_daemon_cores can be used to specify which core(s) you want the orte daemons to use. This will have no bearing on the application procs - unbound will remain unbound, and binding directives will be applied to the apps.
Yippee skippee... This commit was SVN r30513.
This commit is contained in:
parent
4c646ab06b
commit
193cceb483
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -250,7 +250,7 @@ OPAL_DECLSPEC int opal_hwloc_print(char **output, char *prefix,
|
||||
* Make a prettyprint string for a hwloc_cpuset_t (e.g., "socket
|
||||
* 2[core 3]").
|
||||
*/
|
||||
int opal_hwloc_base_cset2str(char *str, int len, hwloc_cpuset_t cpuset);
|
||||
OPAL_DECLSPEC int opal_hwloc_base_cset2str(char *str, int len, hwloc_cpuset_t cpuset);
|
||||
|
||||
/**
|
||||
* Make a prettyprint string for a cset in a map format.
|
||||
@ -260,7 +260,10 @@ int opal_hwloc_base_cset2str(char *str, int len, hwloc_cpuset_t cpuset);
|
||||
* . - signifies PU a process not bound to
|
||||
* B - signifies PU a process is bound to
|
||||
*/
|
||||
int opal_hwloc_base_cset2mapstr(char *str, int len, hwloc_cpuset_t cpuset);
|
||||
OPAL_DECLSPEC int opal_hwloc_base_cset2mapstr(char *str, int len, hwloc_cpuset_t cpuset);
|
||||
|
||||
/* get the hwloc object that corresponds to the given LOGICAL processor id */
|
||||
OPAL_DECLSPEC hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo, int lid);
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2013 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -48,7 +48,7 @@
|
||||
* only find PUs (!). On such platforms, then do the same calculation
|
||||
* but with PUs instead of COREs.
|
||||
*/
|
||||
static hwloc_obj_t get_pu(hwloc_topology_t topo, int lid)
|
||||
hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo, int lid)
|
||||
{
|
||||
hwloc_obj_type_t obj_type = HWLOC_OBJ_CORE;
|
||||
hwloc_obj_t obj;
|
||||
@ -130,7 +130,7 @@ int opal_hwloc_base_filter_cpus(hwloc_topology_t topo)
|
||||
case 1:
|
||||
/* only one cpu given - get that object */
|
||||
cpu = strtoul(range[0], NULL, 10);
|
||||
if (NULL == (pu = get_pu(topo, cpu))) {
|
||||
if (NULL == (pu = opal_hwloc_base_get_pu(topo, cpu))) {
|
||||
opal_argv_free(ranges);
|
||||
opal_argv_free(range);
|
||||
return OPAL_ERROR;
|
||||
@ -144,7 +144,7 @@ int opal_hwloc_base_filter_cpus(hwloc_topology_t topo)
|
||||
start = strtoul(range[0], NULL, 10);
|
||||
end = strtoul(range[1], NULL, 10);
|
||||
for (cpu=start; cpu <= end; cpu++) {
|
||||
if (NULL == (pu = get_pu(topo, cpu))) {
|
||||
if (NULL == (pu = opal_hwloc_base_get_pu(topo, cpu))) {
|
||||
opal_argv_free(ranges);
|
||||
opal_argv_free(range);
|
||||
hwloc_bitmap_free(avail);
|
||||
@ -1265,7 +1265,7 @@ int opal_hwloc_base_slot_list_parse(const char *slot_str,
|
||||
for (j=0; NULL != list[j]; j++) {
|
||||
core_id = atoi(list[j]);
|
||||
/* find the specified logical available cpu */
|
||||
if (NULL == (pu = get_pu(topo, core_id))) {
|
||||
if (NULL == (pu = opal_hwloc_base_get_pu(topo, core_id))) {
|
||||
opal_argv_free(range);
|
||||
opal_argv_free(item);
|
||||
return OPAL_ERROR;
|
||||
@ -1283,7 +1283,7 @@ int opal_hwloc_base_slot_list_parse(const char *slot_str,
|
||||
upper_range = atoi(range[1]);
|
||||
for (core_id=lower_range; core_id <= upper_range; core_id++) {
|
||||
/* find the specified logical available cpu */
|
||||
if (NULL == (pu = get_pu(topo, core_id))) {
|
||||
if (NULL == (pu = opal_hwloc_base_get_pu(topo, core_id))) {
|
||||
opal_argv_free(range);
|
||||
opal_argv_free(item);
|
||||
return OPAL_ERROR;
|
||||
|
@ -128,3 +128,11 @@ Error message received from:
|
||||
Message:
|
||||
|
||||
%s
|
||||
#
|
||||
[incorrectly-bound]
|
||||
WARNING: Open MPI incorrectly bound a process to the daemon's cores.
|
||||
This is a warning only; your job will continue.
|
||||
|
||||
Local host: %s
|
||||
Application name: %s
|
||||
Location: %s:%d
|
||||
|
@ -15,7 +15,7 @@
|
||||
* Copyright (c) 2010 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2013 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -430,9 +430,32 @@ static int do_child(orte_app_context_t* context,
|
||||
#if OPAL_HAVE_HWLOC
|
||||
{
|
||||
hwloc_cpuset_t cpuset;
|
||||
hwloc_obj_t root;
|
||||
opal_hwloc_topo_data_t *sum;
|
||||
|
||||
/* Set process affinity, if given */
|
||||
if (NULL != child->cpu_bitmap) {
|
||||
if (NULL == child->cpu_bitmap) {
|
||||
/* if the daemon is bound, then we need to "free" this proc */
|
||||
if (NULL != orte_daemon_cores) {
|
||||
root = hwloc_get_root_obj(opal_hwloc_topology);
|
||||
if (NULL == root->userdata) {
|
||||
send_warn_show_help(write_fd,
|
||||
"help-orte-odls-default.txt", "incorrectly bound",
|
||||
orte_process_info.nodename, context->app,
|
||||
__FILE__, __LINE__);
|
||||
}
|
||||
sum = (opal_hwloc_topo_data_t*)root->userdata;
|
||||
/* bind this proc to all available processors */
|
||||
hwloc_set_cpubind(opal_hwloc_topology, sum->available, 0);
|
||||
}
|
||||
if (opal_hwloc_report_bindings) {
|
||||
opal_output(0, "MCW rank %d is not bound (or bound to all available processors)", child->name.vpid);
|
||||
/* avoid reporting it twice */
|
||||
(void) mca_base_var_env_name ("hwloc_base_report_bindings", ¶m);
|
||||
opal_unsetenv(param, &environ_copy);
|
||||
free(param);
|
||||
}
|
||||
} else {
|
||||
if (0 == strlen(child->cpu_bitmap)) {
|
||||
/* this proc is not bound */
|
||||
if (opal_hwloc_report_bindings) {
|
||||
@ -442,6 +465,19 @@ static int do_child(orte_app_context_t* context,
|
||||
opal_unsetenv(param, &environ_copy);
|
||||
free(param);
|
||||
}
|
||||
/* if the daemon is bound, then we need to "free" this proc */
|
||||
if (NULL != orte_daemon_cores) {
|
||||
root = hwloc_get_root_obj(opal_hwloc_topology);
|
||||
if (NULL == root->userdata) {
|
||||
send_warn_show_help(write_fd,
|
||||
"help-orte-odls-default.txt", "incorrectly bound",
|
||||
orte_process_info.nodename, context->app,
|
||||
__FILE__, __LINE__);
|
||||
}
|
||||
sum = (opal_hwloc_topo_data_t*)root->userdata;
|
||||
/* bind this proc to all available processors */
|
||||
hwloc_set_cpubind(opal_hwloc_topology, sum->available, 0);
|
||||
}
|
||||
/* Set an info MCA param that tells
|
||||
the launched processes that it was bound by us (e.g., so that
|
||||
MPI_INIT doesn't try to bind itself) */
|
||||
|
@ -10,6 +10,7 @@
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -31,3 +32,13 @@ in the environment. Returned value %d instead of ORTE_SUCCESS.
|
||||
Open RTE was unable to initialize properly. The error occured while
|
||||
attempting to %s. Returned value %d instead of ORTE_SUCCESS.
|
||||
|
||||
#
|
||||
[orted:cannot-bind]
|
||||
A request was made to bind the Open RTE daemons to
|
||||
a core that does not exist on this node:
|
||||
|
||||
node: %s
|
||||
cores: %s
|
||||
|
||||
The MCA param directing this behavior is orte_daemon_cores.
|
||||
Please correct the request and try again.
|
||||
|
@ -15,7 +15,7 @@
|
||||
* Copyright (c) 2009 Institut National de Recherche en Informatique
|
||||
* et Automatique. All rights reserved.
|
||||
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2013 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -70,6 +70,7 @@
|
||||
#include "orte/util/session_dir.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/nidmap.h"
|
||||
#include "orte/util/parse_options.h"
|
||||
#include "orte/mca/rml/base/rml_contact.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
@ -369,6 +370,59 @@ int orte_daemon(int argc, char *argv[])
|
||||
*/
|
||||
opal_finalize_util();
|
||||
|
||||
#if OPAL_HAVE_HWLOC
|
||||
/* bind ourselves if so directed */
|
||||
if (NULL != orte_daemon_cores) {
|
||||
char **cores=NULL, tmp[128];
|
||||
hwloc_obj_t pu;
|
||||
hwloc_cpuset_t ours, pucpus, res;
|
||||
int core;
|
||||
|
||||
/* could be a collection of comma-delimited ranges, so
|
||||
* use our handy utility to parse it
|
||||
*/
|
||||
orte_util_parse_range_options(orte_daemon_cores, &cores);
|
||||
if (NULL != cores) {
|
||||
ours = hwloc_bitmap_alloc();
|
||||
hwloc_bitmap_zero(ours);
|
||||
pucpus = hwloc_bitmap_alloc();
|
||||
res = hwloc_bitmap_alloc();
|
||||
for (i=0; NULL != cores[i]; i++) {
|
||||
core = strtoul(cores[i], NULL, 10);
|
||||
if (NULL == (pu = opal_hwloc_base_get_pu(opal_hwloc_topology, core))) {
|
||||
/* turn off the show help forwarding as we won't
|
||||
* be able to cycle the event library to send
|
||||
*/
|
||||
orte_show_help_finalize();
|
||||
/* the message will now come out locally */
|
||||
orte_show_help("help-orted.txt", "orted:cannot-bind",
|
||||
true, orte_process_info.nodename,
|
||||
orte_daemon_cores);
|
||||
ret = ORTE_ERR_NOT_SUPPORTED;
|
||||
goto DONE;
|
||||
}
|
||||
hwloc_bitmap_and(pucpus, pu->online_cpuset, pu->allowed_cpuset);
|
||||
hwloc_bitmap_or(res, ours, pucpus);
|
||||
hwloc_bitmap_copy(ours, res);
|
||||
}
|
||||
/* if the result is all zeros, then don't bind */
|
||||
if (!hwloc_bitmap_iszero(ours)) {
|
||||
(void)hwloc_set_cpubind(opal_hwloc_topology, ours, 0);
|
||||
if (opal_hwloc_report_bindings) {
|
||||
opal_hwloc_base_cset2mapstr(tmp, sizeof(tmp), ours);
|
||||
opal_output(0, "Daemon %s is bound to cores %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp);
|
||||
}
|
||||
}
|
||||
/* cleanup */
|
||||
hwloc_bitmap_free(ours);
|
||||
hwloc_bitmap_free(pucpus);
|
||||
hwloc_bitmap_free(res);
|
||||
opal_argv_free(cores);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if ((int)ORTE_VPID_INVALID != orted_globals.fail) {
|
||||
orted_globals.abort=false;
|
||||
/* some vpid was ordered to fail. The value can be positive
|
||||
@ -807,6 +861,7 @@ int orte_daemon(int argc, char *argv[])
|
||||
if (orte_debug_daemons_flag) {
|
||||
opal_output(0, "%s orted: up and running - waiting for commands!", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
}
|
||||
ret = ORTE_SUCCESS;
|
||||
|
||||
/* loop the event lib until an exit event is detected */
|
||||
while (orte_event_base_active) {
|
||||
@ -818,7 +873,7 @@ int orte_daemon(int argc, char *argv[])
|
||||
|
||||
DONE:
|
||||
/* update the exit status, in case it wasn't done */
|
||||
ORTE_UPDATE_EXIT_STATUS(orte_exit_status);
|
||||
ORTE_UPDATE_EXIT_STATUS(ret);
|
||||
|
||||
/* cleanup and leave */
|
||||
orte_finalize();
|
||||
|
@ -13,7 +13,7 @@
|
||||
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2013 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -214,6 +214,7 @@ bool orte_report_silent_errors = false;
|
||||
param */
|
||||
bool orte_in_parallel_debugger = false;
|
||||
|
||||
char *orte_daemon_cores = NULL;
|
||||
|
||||
int orte_dt_init(void)
|
||||
{
|
||||
|
@ -737,6 +737,11 @@ ORTE_DECLSPEC extern opal_byte_object_t orte_pidmap;
|
||||
/* user debugger */
|
||||
ORTE_DECLSPEC extern char *orte_base_user_debugger;
|
||||
|
||||
/* binding directives for daemons to restrict them
|
||||
* to certain cores
|
||||
*/
|
||||
ORTE_DECLSPEC extern char *orte_daemon_cores;
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* ORTE_RUNTIME_ORTE_GLOBALS_H */
|
||||
|
@ -753,5 +753,14 @@ int orte_register_params(void)
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&orte_soft_locations);
|
||||
|
||||
/* allow specification of the cores to be used by daemons */
|
||||
orte_daemon_cores = NULL;
|
||||
(void) mca_base_var_register ("orte", "orte", NULL, "daemon_cores",
|
||||
"Restrict the ORTE daemons (including mpirun) to operate on the specified cores",
|
||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&orte_daemon_cores);
|
||||
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user