From 7138fd131f1584bf343a2fa67cfe060276728dfd Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sat, 19 Sep 2009 17:43:21 +0000 Subject: [PATCH] Final cleanup on new paffinity "if-avail" messages, plus fix one bug reported by Terry This commit was SVN r21978. --- orte/mca/odls/default/help-odls-default.txt | 11 ++++ orte/mca/odls/default/odls_default_module.c | 61 +++++++++------------ orte/runtime/orte_mca_params.c | 8 +-- 3 files changed, 40 insertions(+), 40 deletions(-) diff --git a/orte/mca/odls/default/help-odls-default.txt b/orte/mca/odls/default/help-odls-default.txt index 5ecde83141..6b14712529 100644 --- a/orte/mca/odls/default/help-odls-default.txt +++ b/orte/mca/odls/default/help-odls-default.txt @@ -107,3 +107,14 @@ use "bind to socket" and other related functionality. Local host: %s Action attempted: %s %s Application name: %s +# +[odls-default:binding-not-avail] +A request to bind the processes if the operating system supports +such an operation was made, but the OS does not support this operation: + + Action requested: %s + +Because the request was made on an "if-available" basis, the job was +launched without taking the requested action. If this is not the desired +behavior, talk to your local system administrator to find out if your +system can support the requested action. diff --git a/orte/mca/odls/default/odls_default_module.c b/orte/mca/odls/default/odls_default_module.c index 771da776f4..f3c9e830ef 100644 --- a/orte/mca/odls/default/odls_default_module.c +++ b/orte/mca/odls/default/odls_default_module.c @@ -109,6 +109,19 @@ orte_odls_base_module_t orte_odls_default_module = { exit(1); \ } while(0); +/* convenience macro for checking binding requirements */ +#define ORTE_ODLS_IF_BIND_NOT_REQD(n) \ + do { \ + if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) { \ + if (orte_odls_globals.report_bindings) { \ + orte_show_help("help-odls-default.txt", \ + "odle-default:binding-not-avail", \ + true, (n)); \ + } \ + goto LAUNCH_PROCS; \ + } \ + } while(0); + static bool odls_default_child_died(pid_t pid, unsigned int timeout, int *exit_status) { time_t end; @@ -362,9 +375,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context, } /* if we don't have enough processors, that is an error */ if (ncpu < logical_cpu) { - if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) { - goto LAUNCH_PROCS; - } + ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-core"); orte_show_help("help-odls-default.txt", "odls-default:not-enough-processors", true); ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL); @@ -376,9 +387,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context, */ phys_cpu = opal_paffinity_base_get_physical_processor_id(logical_cpu); if (0 > phys_cpu) { - if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) { - goto LAUNCH_PROCS; - } + ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-core"); orte_show_help("help-odls-default.txt", "odls-default:invalid-phys-cpu", true); ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL); @@ -393,9 +402,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context, ORTE_NAME_PRINT(child->name), mask.bitmask[0]); } if (ORTE_SUCCESS != (rc = opal_paffinity_base_set(mask))) { - if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) { - goto LAUNCH_PROCS; - } + ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-core"); orte_show_help("help-odls-default.txt", "odls-default:failed-set-paff", true); ORTE_ODLS_ERROR_OUT(rc); @@ -438,9 +445,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context, } /* if we don't have enough sockets, that is an error */ if (n < logical_skt) { - if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) { - goto LAUNCH_PROCS; - } + ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket"); orte_show_help("help-odls-default.txt", "odls-default:not-enough-sockets", true); ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL); @@ -449,9 +454,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context, target_socket = opal_paffinity_base_get_physical_socket_id(logical_skt); if (ORTE_ERR_NOT_SUPPORTED == target_socket) { /* OS doesn't support providing topology information */ - if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) { - goto LAUNCH_PROCS; - } + ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket"); orte_show_help("help-odls-default.txt", "odls-default:topo-not-supported", true, orte_process_info.nodename, "bind-to-socket", "", @@ -475,9 +478,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context, target_socket = opal_paffinity_base_get_physical_socket_id(lrank % orte_odls_globals.num_sockets); if (ORTE_ERR_NOT_SUPPORTED == target_socket) { /* OS does not support providing topology information */ - if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) { - goto LAUNCH_PROCS; - } + ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket"); orte_show_help("help-odls-default.txt", "odls-default:topo-not-supported", true, orte_process_info.nodename, "bind-to-socket", "", @@ -511,9 +512,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context, } /* if we don't have enough processors, that is an error */ if (ncpu < logical_cpu) { - if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) { - goto LAUNCH_PROCS; - } + ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket"); orte_show_help("help-odls-default.txt", "odls-default:not-enough-processors", true); ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL); @@ -534,9 +533,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context, target_socket = opal_paffinity_base_get_physical_socket_id(0); if (ORTE_ERR_NOT_SUPPORTED == target_socket) { /* OS doesn't support providing topology information */ - if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) { - goto LAUNCH_PROCS; - } + ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket"); orte_show_help("help-odls-default.txt", "odls-default:topo-not-supported", true, orte_process_info.nodename, "bind-to-socket", "", @@ -552,9 +549,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context, target_socket = opal_paffinity_base_get_physical_socket_id(logical_skt); if (ORTE_ERR_NOT_SUPPORTED == target_socket) { /* OS doesn't support providing topology information */ - if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) { - goto LAUNCH_PROCS; - } + ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket"); orte_show_help("help-odls-default.txt", "odls-default:topo-not-supported", true, orte_process_info.nodename, "bind-to-socket", "", @@ -573,18 +568,14 @@ static int odls_default_fork_local_proc(orte_app_context_t* context, /* get the physical core within this target socket */ phys_core = opal_paffinity_base_get_physical_core_id(target_socket, n); if (0 > phys_core) { - if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) { - goto LAUNCH_PROCS; - } + ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket"); orte_show_help("help-odls-default.txt", "odls-default:invalid-phys-cpu", true); ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL); } /* map this to a physical cpu on this node */ if (ORTE_SUCCESS != opal_paffinity_base_get_map_to_processor_id(target_socket, phys_core, &phys_cpu)) { - if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) { - goto LAUNCH_PROCS; - } + ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket"); orte_show_help("help-odls-default.txt", "odls-default:invalid-phys-cpu", true); ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL); @@ -616,9 +607,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context, ORTE_NAME_PRINT(child->name), target_socket, mask.bitmask[0]); } if (ORTE_SUCCESS != (rc = opal_paffinity_base_set(mask))) { - if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) { - goto LAUNCH_PROCS; - } + ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket"); orte_show_help("help-odls-default.txt", "odls-default:failed-set-paff", true); ORTE_ODLS_ERROR_OUT(rc); diff --git a/orte/runtime/orte_mca_params.c b/orte/runtime/orte_mca_params.c index eb1c488130..f2c21596d9 100644 --- a/orte/runtime/orte_mca_params.c +++ b/orte/runtime/orte_mca_params.c @@ -366,7 +366,7 @@ int orte_register_params(void) } else { binding = 0; params = opal_argv_split(strval, ':'); - if (2 == opal_argv_count(params)) { + if (1 < opal_argv_count(params)) { if (0 != strcasecmp(params[1], "if-avail")) { /* unknown option */ opal_output(0, "Unknown qualifier to orte_process_binding: %s", strval); @@ -374,11 +374,11 @@ int orte_register_params(void) } binding = ORTE_BIND_IF_SUPPORTED; } - if (0 == strcasecmp(strval, "socket")) { + if (0 == strcasecmp(params[0], "socket")) { ORTE_SET_BINDING_POLICY(ORTE_BIND_TO_SOCKET | binding); - } else if (0 == strcasecmp(strval, "board")) { + } else if (0 == strcasecmp(params[0], "board")) { ORTE_SET_BINDING_POLICY(ORTE_BIND_TO_BOARD | binding); - } else if (0 == strcasecmp(strval, "core")) { + } else if (0 == strcasecmp(params[0], "core")) { ORTE_SET_BINDING_POLICY(ORTE_BIND_TO_CORE | binding); } }