1
1

Okay, since a certain other RM out there made a fuss about being able to lock their daemons to specified cores, offer the same option here. The MCA param orte_daemon_cores can be used to specify which core(s) you want the orte daemons to use. This will have no bearing on the application procs - unbound will remain unbound, and binding directives will be applied to the apps.

Yippee skippee...

This commit was SVN r30513.
Этот коммит содержится в:
Ralph Castain 2014-01-30 23:50:14 +00:00
родитель 4c646ab06b
Коммит 193cceb483
9 изменённых файлов: 142 добавлений и 14 удалений

Просмотреть файл

@ -1,6 +1,6 @@
/* /*
* Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -250,7 +250,7 @@ OPAL_DECLSPEC int opal_hwloc_print(char **output, char *prefix,
* Make a prettyprint string for a hwloc_cpuset_t (e.g., "socket * Make a prettyprint string for a hwloc_cpuset_t (e.g., "socket
* 2[core 3]"). * 2[core 3]").
*/ */
int opal_hwloc_base_cset2str(char *str, int len, hwloc_cpuset_t cpuset); OPAL_DECLSPEC int opal_hwloc_base_cset2str(char *str, int len, hwloc_cpuset_t cpuset);
/** /**
* Make a prettyprint string for a cset in a map format. * Make a prettyprint string for a cset in a map format.
@ -260,7 +260,10 @@ int opal_hwloc_base_cset2str(char *str, int len, hwloc_cpuset_t cpuset);
* . - signifies PU a process not bound to * . - signifies PU a process not bound to
* B - signifies PU a process is bound to * B - signifies PU a process is bound to
*/ */
int opal_hwloc_base_cset2mapstr(char *str, int len, hwloc_cpuset_t cpuset); OPAL_DECLSPEC int opal_hwloc_base_cset2mapstr(char *str, int len, hwloc_cpuset_t cpuset);
/* get the hwloc object that corresponds to the given LOGICAL processor id */
OPAL_DECLSPEC hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo, int lid);
#endif #endif

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. * Copyright (c) 2012-2013 Los Alamos National Security, LLC.
* All rights reserved. * All rights reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -48,7 +48,7 @@
* only find PUs (!). On such platforms, then do the same calculation * only find PUs (!). On such platforms, then do the same calculation
* but with PUs instead of COREs. * but with PUs instead of COREs.
*/ */
static hwloc_obj_t get_pu(hwloc_topology_t topo, int lid) hwloc_obj_t opal_hwloc_base_get_pu(hwloc_topology_t topo, int lid)
{ {
hwloc_obj_type_t obj_type = HWLOC_OBJ_CORE; hwloc_obj_type_t obj_type = HWLOC_OBJ_CORE;
hwloc_obj_t obj; hwloc_obj_t obj;
@ -130,7 +130,7 @@ int opal_hwloc_base_filter_cpus(hwloc_topology_t topo)
case 1: case 1:
/* only one cpu given - get that object */ /* only one cpu given - get that object */
cpu = strtoul(range[0], NULL, 10); cpu = strtoul(range[0], NULL, 10);
if (NULL == (pu = get_pu(topo, cpu))) { if (NULL == (pu = opal_hwloc_base_get_pu(topo, cpu))) {
opal_argv_free(ranges); opal_argv_free(ranges);
opal_argv_free(range); opal_argv_free(range);
return OPAL_ERROR; return OPAL_ERROR;
@ -144,7 +144,7 @@ int opal_hwloc_base_filter_cpus(hwloc_topology_t topo)
start = strtoul(range[0], NULL, 10); start = strtoul(range[0], NULL, 10);
end = strtoul(range[1], NULL, 10); end = strtoul(range[1], NULL, 10);
for (cpu=start; cpu <= end; cpu++) { for (cpu=start; cpu <= end; cpu++) {
if (NULL == (pu = get_pu(topo, cpu))) { if (NULL == (pu = opal_hwloc_base_get_pu(topo, cpu))) {
opal_argv_free(ranges); opal_argv_free(ranges);
opal_argv_free(range); opal_argv_free(range);
hwloc_bitmap_free(avail); hwloc_bitmap_free(avail);
@ -1265,7 +1265,7 @@ int opal_hwloc_base_slot_list_parse(const char *slot_str,
for (j=0; NULL != list[j]; j++) { for (j=0; NULL != list[j]; j++) {
core_id = atoi(list[j]); core_id = atoi(list[j]);
/* find the specified logical available cpu */ /* find the specified logical available cpu */
if (NULL == (pu = get_pu(topo, core_id))) { if (NULL == (pu = opal_hwloc_base_get_pu(topo, core_id))) {
opal_argv_free(range); opal_argv_free(range);
opal_argv_free(item); opal_argv_free(item);
return OPAL_ERROR; return OPAL_ERROR;
@ -1283,7 +1283,7 @@ int opal_hwloc_base_slot_list_parse(const char *slot_str,
upper_range = atoi(range[1]); upper_range = atoi(range[1]);
for (core_id=lower_range; core_id <= upper_range; core_id++) { for (core_id=lower_range; core_id <= upper_range; core_id++) {
/* find the specified logical available cpu */ /* find the specified logical available cpu */
if (NULL == (pu = get_pu(topo, core_id))) { if (NULL == (pu = opal_hwloc_base_get_pu(topo, core_id))) {
opal_argv_free(range); opal_argv_free(range);
opal_argv_free(item); opal_argv_free(item);
return OPAL_ERROR; return OPAL_ERROR;

Просмотреть файл

@ -128,3 +128,11 @@ Error message received from:
Message: Message:
%s %s
#
[incorrectly-bound]
WARNING: Open MPI incorrectly bound a process to the daemon's cores.
This is a warning only; your job will continue.
Local host: %s
Application name: %s
Location: %s:%d

Просмотреть файл

@ -15,7 +15,7 @@
* Copyright (c) 2010 IBM Corporation. All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved. * reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved * Copyright (c) 2013-2014 Intel, Inc. All rights reserved
* *
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -430,9 +430,32 @@ static int do_child(orte_app_context_t* context,
#if OPAL_HAVE_HWLOC #if OPAL_HAVE_HWLOC
{ {
hwloc_cpuset_t cpuset; hwloc_cpuset_t cpuset;
hwloc_obj_t root;
opal_hwloc_topo_data_t *sum;
/* Set process affinity, if given */ /* Set process affinity, if given */
if (NULL != child->cpu_bitmap) { if (NULL == child->cpu_bitmap) {
/* if the daemon is bound, then we need to "free" this proc */
if (NULL != orte_daemon_cores) {
root = hwloc_get_root_obj(opal_hwloc_topology);
if (NULL == root->userdata) {
send_warn_show_help(write_fd,
"help-orte-odls-default.txt", "incorrectly bound",
orte_process_info.nodename, context->app,
__FILE__, __LINE__);
}
sum = (opal_hwloc_topo_data_t*)root->userdata;
/* bind this proc to all available processors */
hwloc_set_cpubind(opal_hwloc_topology, sum->available, 0);
}
if (opal_hwloc_report_bindings) {
opal_output(0, "MCW rank %d is not bound (or bound to all available processors)", child->name.vpid);
/* avoid reporting it twice */
(void) mca_base_var_env_name ("hwloc_base_report_bindings", &param);
opal_unsetenv(param, &environ_copy);
free(param);
}
} else {
if (0 == strlen(child->cpu_bitmap)) { if (0 == strlen(child->cpu_bitmap)) {
/* this proc is not bound */ /* this proc is not bound */
if (opal_hwloc_report_bindings) { if (opal_hwloc_report_bindings) {
@ -442,6 +465,19 @@ static int do_child(orte_app_context_t* context,
opal_unsetenv(param, &environ_copy); opal_unsetenv(param, &environ_copy);
free(param); free(param);
} }
/* if the daemon is bound, then we need to "free" this proc */
if (NULL != orte_daemon_cores) {
root = hwloc_get_root_obj(opal_hwloc_topology);
if (NULL == root->userdata) {
send_warn_show_help(write_fd,
"help-orte-odls-default.txt", "incorrectly bound",
orte_process_info.nodename, context->app,
__FILE__, __LINE__);
}
sum = (opal_hwloc_topo_data_t*)root->userdata;
/* bind this proc to all available processors */
hwloc_set_cpubind(opal_hwloc_topology, sum->available, 0);
}
/* Set an info MCA param that tells /* Set an info MCA param that tells
the launched processes that it was bound by us (e.g., so that the launched processes that it was bound by us (e.g., so that
MPI_INIT doesn't try to bind itself) */ MPI_INIT doesn't try to bind itself) */

Просмотреть файл

@ -10,6 +10,7 @@
# University of Stuttgart. All rights reserved. # University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California. # Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved. # All rights reserved.
# Copyright (c) 2014 Intel, Inc. All rights reserved.
# $COPYRIGHT$ # $COPYRIGHT$
# #
# Additional copyrights may follow # Additional copyrights may follow
@ -31,3 +32,13 @@ in the environment. Returned value %d instead of ORTE_SUCCESS.
Open RTE was unable to initialize properly. The error occured while Open RTE was unable to initialize properly. The error occured while
attempting to %s. Returned value %d instead of ORTE_SUCCESS. attempting to %s. Returned value %d instead of ORTE_SUCCESS.
#
[orted:cannot-bind]
A request was made to bind the Open RTE daemons to
a core that does not exist on this node:
node: %s
cores: %s
The MCA param directing this behavior is orte_daemon_cores.
Please correct the request and try again.

Просмотреть файл

@ -15,7 +15,7 @@
* Copyright (c) 2009 Institut National de Recherche en Informatique * Copyright (c) 2009 Institut National de Recherche en Informatique
* et Automatique. All rights reserved. * et Automatique. All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved. * Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -70,6 +70,7 @@
#include "orte/util/session_dir.h" #include "orte/util/session_dir.h"
#include "orte/util/name_fns.h" #include "orte/util/name_fns.h"
#include "orte/util/nidmap.h" #include "orte/util/nidmap.h"
#include "orte/util/parse_options.h"
#include "orte/mca/rml/base/rml_contact.h" #include "orte/mca/rml/base/rml_contact.h"
#include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/errmgr.h"
@ -369,6 +370,59 @@ int orte_daemon(int argc, char *argv[])
*/ */
opal_finalize_util(); opal_finalize_util();
#if OPAL_HAVE_HWLOC
/* bind ourselves if so directed */
if (NULL != orte_daemon_cores) {
char **cores=NULL, tmp[128];
hwloc_obj_t pu;
hwloc_cpuset_t ours, pucpus, res;
int core;
/* could be a collection of comma-delimited ranges, so
* use our handy utility to parse it
*/
orte_util_parse_range_options(orte_daemon_cores, &cores);
if (NULL != cores) {
ours = hwloc_bitmap_alloc();
hwloc_bitmap_zero(ours);
pucpus = hwloc_bitmap_alloc();
res = hwloc_bitmap_alloc();
for (i=0; NULL != cores[i]; i++) {
core = strtoul(cores[i], NULL, 10);
if (NULL == (pu = opal_hwloc_base_get_pu(opal_hwloc_topology, core))) {
/* turn off the show help forwarding as we won't
* be able to cycle the event library to send
*/
orte_show_help_finalize();
/* the message will now come out locally */
orte_show_help("help-orted.txt", "orted:cannot-bind",
true, orte_process_info.nodename,
orte_daemon_cores);
ret = ORTE_ERR_NOT_SUPPORTED;
goto DONE;
}
hwloc_bitmap_and(pucpus, pu->online_cpuset, pu->allowed_cpuset);
hwloc_bitmap_or(res, ours, pucpus);
hwloc_bitmap_copy(ours, res);
}
/* if the result is all zeros, then don't bind */
if (!hwloc_bitmap_iszero(ours)) {
(void)hwloc_set_cpubind(opal_hwloc_topology, ours, 0);
if (opal_hwloc_report_bindings) {
opal_hwloc_base_cset2mapstr(tmp, sizeof(tmp), ours);
opal_output(0, "Daemon %s is bound to cores %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp);
}
}
/* cleanup */
hwloc_bitmap_free(ours);
hwloc_bitmap_free(pucpus);
hwloc_bitmap_free(res);
opal_argv_free(cores);
}
}
#endif
if ((int)ORTE_VPID_INVALID != orted_globals.fail) { if ((int)ORTE_VPID_INVALID != orted_globals.fail) {
orted_globals.abort=false; orted_globals.abort=false;
/* some vpid was ordered to fail. The value can be positive /* some vpid was ordered to fail. The value can be positive
@ -807,6 +861,7 @@ int orte_daemon(int argc, char *argv[])
if (orte_debug_daemons_flag) { if (orte_debug_daemons_flag) {
opal_output(0, "%s orted: up and running - waiting for commands!", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); opal_output(0, "%s orted: up and running - waiting for commands!", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
} }
ret = ORTE_SUCCESS;
/* loop the event lib until an exit event is detected */ /* loop the event lib until an exit event is detected */
while (orte_event_base_active) { while (orte_event_base_active) {
@ -818,7 +873,7 @@ int orte_daemon(int argc, char *argv[])
DONE: DONE:
/* update the exit status, in case it wasn't done */ /* update the exit status, in case it wasn't done */
ORTE_UPDATE_EXIT_STATUS(orte_exit_status); ORTE_UPDATE_EXIT_STATUS(ret);
/* cleanup and leave */ /* cleanup and leave */
orte_finalize(); orte_finalize();

Просмотреть файл

@ -13,7 +13,7 @@
* Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. * Copyright (c) 2011-2013 Los Alamos National Security, LLC.
* All rights reserved. * All rights reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved * Copyright (c) 2013-2014 Intel, Inc. All rights reserved
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -214,6 +214,7 @@ bool orte_report_silent_errors = false;
param */ param */
bool orte_in_parallel_debugger = false; bool orte_in_parallel_debugger = false;
char *orte_daemon_cores = NULL;
int orte_dt_init(void) int orte_dt_init(void)
{ {

Просмотреть файл

@ -737,6 +737,11 @@ ORTE_DECLSPEC extern opal_byte_object_t orte_pidmap;
/* user debugger */ /* user debugger */
ORTE_DECLSPEC extern char *orte_base_user_debugger; ORTE_DECLSPEC extern char *orte_base_user_debugger;
/* binding directives for daemons to restrict them
* to certain cores
*/
ORTE_DECLSPEC extern char *orte_daemon_cores;
END_C_DECLS END_C_DECLS
#endif /* ORTE_RUNTIME_ORTE_GLOBALS_H */ #endif /* ORTE_RUNTIME_ORTE_GLOBALS_H */

Просмотреть файл

@ -753,5 +753,14 @@ int orte_register_params(void)
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&orte_soft_locations); &orte_soft_locations);
/* allow specification of the cores to be used by daemons */
orte_daemon_cores = NULL;
(void) mca_base_var_register ("orte", "orte", NULL, "daemon_cores",
"Restrict the ORTE daemons (including mpirun) to operate on the specified cores",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&orte_daemon_cores);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }