1
1

Final cleanup on new paffinity "if-avail" messages, plus fix one bug reported by Terry

This commit was SVN r21978.
Этот коммит содержится в:
Ralph Castain 2009-09-19 17:43:21 +00:00
родитель b18ca686ae
Коммит 7138fd131f
3 изменённых файлов: 40 добавлений и 40 удалений

Просмотреть файл

@ -107,3 +107,14 @@ use "bind to socket" and other related functionality.
Local host: %s Local host: %s
Action attempted: %s %s Action attempted: %s %s
Application name: %s Application name: %s
#
[odls-default:binding-not-avail]
A request to bind the processes if the operating system supports
such an operation was made, but the OS does not support this operation:
Action requested: %s
Because the request was made on an "if-available" basis, the job was
launched without taking the requested action. If this is not the desired
behavior, talk to your local system administrator to find out if your
system can support the requested action.

Просмотреть файл

@ -109,6 +109,19 @@ orte_odls_base_module_t orte_odls_default_module = {
exit(1); \ exit(1); \
} while(0); } while(0);
/* convenience macro for checking binding requirements */
#define ORTE_ODLS_IF_BIND_NOT_REQD(n) \
do { \
if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) { \
if (orte_odls_globals.report_bindings) { \
orte_show_help("help-odls-default.txt", \
"odle-default:binding-not-avail", \
true, (n)); \
} \
goto LAUNCH_PROCS; \
} \
} while(0);
static bool odls_default_child_died(pid_t pid, unsigned int timeout, int *exit_status) static bool odls_default_child_died(pid_t pid, unsigned int timeout, int *exit_status)
{ {
time_t end; time_t end;
@ -362,9 +375,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
} }
/* if we don't have enough processors, that is an error */ /* if we don't have enough processors, that is an error */
if (ncpu < logical_cpu) { if (ncpu < logical_cpu) {
if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) { ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-core");
goto LAUNCH_PROCS;
}
orte_show_help("help-odls-default.txt", orte_show_help("help-odls-default.txt",
"odls-default:not-enough-processors", true); "odls-default:not-enough-processors", true);
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL); ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
@ -376,9 +387,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
*/ */
phys_cpu = opal_paffinity_base_get_physical_processor_id(logical_cpu); phys_cpu = opal_paffinity_base_get_physical_processor_id(logical_cpu);
if (0 > phys_cpu) { if (0 > phys_cpu) {
if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) { ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-core");
goto LAUNCH_PROCS;
}
orte_show_help("help-odls-default.txt", orte_show_help("help-odls-default.txt",
"odls-default:invalid-phys-cpu", true); "odls-default:invalid-phys-cpu", true);
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL); ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
@ -393,9 +402,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
ORTE_NAME_PRINT(child->name), mask.bitmask[0]); ORTE_NAME_PRINT(child->name), mask.bitmask[0]);
} }
if (ORTE_SUCCESS != (rc = opal_paffinity_base_set(mask))) { if (ORTE_SUCCESS != (rc = opal_paffinity_base_set(mask))) {
if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) { ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-core");
goto LAUNCH_PROCS;
}
orte_show_help("help-odls-default.txt", orte_show_help("help-odls-default.txt",
"odls-default:failed-set-paff", true); "odls-default:failed-set-paff", true);
ORTE_ODLS_ERROR_OUT(rc); ORTE_ODLS_ERROR_OUT(rc);
@ -438,9 +445,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
} }
/* if we don't have enough sockets, that is an error */ /* if we don't have enough sockets, that is an error */
if (n < logical_skt) { if (n < logical_skt) {
if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) { ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
goto LAUNCH_PROCS;
}
orte_show_help("help-odls-default.txt", orte_show_help("help-odls-default.txt",
"odls-default:not-enough-sockets", true); "odls-default:not-enough-sockets", true);
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL); ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
@ -449,9 +454,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
target_socket = opal_paffinity_base_get_physical_socket_id(logical_skt); target_socket = opal_paffinity_base_get_physical_socket_id(logical_skt);
if (ORTE_ERR_NOT_SUPPORTED == target_socket) { if (ORTE_ERR_NOT_SUPPORTED == target_socket) {
/* OS doesn't support providing topology information */ /* OS doesn't support providing topology information */
if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) { ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
goto LAUNCH_PROCS;
}
orte_show_help("help-odls-default.txt", orte_show_help("help-odls-default.txt",
"odls-default:topo-not-supported", "odls-default:topo-not-supported",
true, orte_process_info.nodename, "bind-to-socket", "", true, orte_process_info.nodename, "bind-to-socket", "",
@ -475,9 +478,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
target_socket = opal_paffinity_base_get_physical_socket_id(lrank % orte_odls_globals.num_sockets); target_socket = opal_paffinity_base_get_physical_socket_id(lrank % orte_odls_globals.num_sockets);
if (ORTE_ERR_NOT_SUPPORTED == target_socket) { if (ORTE_ERR_NOT_SUPPORTED == target_socket) {
/* OS does not support providing topology information */ /* OS does not support providing topology information */
if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) { ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
goto LAUNCH_PROCS;
}
orte_show_help("help-odls-default.txt", orte_show_help("help-odls-default.txt",
"odls-default:topo-not-supported", "odls-default:topo-not-supported",
true, orte_process_info.nodename, "bind-to-socket", "", true, orte_process_info.nodename, "bind-to-socket", "",
@ -511,9 +512,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
} }
/* if we don't have enough processors, that is an error */ /* if we don't have enough processors, that is an error */
if (ncpu < logical_cpu) { if (ncpu < logical_cpu) {
if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) { ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
goto LAUNCH_PROCS;
}
orte_show_help("help-odls-default.txt", orte_show_help("help-odls-default.txt",
"odls-default:not-enough-processors", true); "odls-default:not-enough-processors", true);
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL); ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
@ -534,9 +533,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
target_socket = opal_paffinity_base_get_physical_socket_id(0); target_socket = opal_paffinity_base_get_physical_socket_id(0);
if (ORTE_ERR_NOT_SUPPORTED == target_socket) { if (ORTE_ERR_NOT_SUPPORTED == target_socket) {
/* OS doesn't support providing topology information */ /* OS doesn't support providing topology information */
if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) { ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
goto LAUNCH_PROCS;
}
orte_show_help("help-odls-default.txt", orte_show_help("help-odls-default.txt",
"odls-default:topo-not-supported", "odls-default:topo-not-supported",
true, orte_process_info.nodename, "bind-to-socket", "", true, orte_process_info.nodename, "bind-to-socket", "",
@ -552,9 +549,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
target_socket = opal_paffinity_base_get_physical_socket_id(logical_skt); target_socket = opal_paffinity_base_get_physical_socket_id(logical_skt);
if (ORTE_ERR_NOT_SUPPORTED == target_socket) { if (ORTE_ERR_NOT_SUPPORTED == target_socket) {
/* OS doesn't support providing topology information */ /* OS doesn't support providing topology information */
if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) { ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
goto LAUNCH_PROCS;
}
orte_show_help("help-odls-default.txt", orte_show_help("help-odls-default.txt",
"odls-default:topo-not-supported", "odls-default:topo-not-supported",
true, orte_process_info.nodename, "bind-to-socket", "", true, orte_process_info.nodename, "bind-to-socket", "",
@ -573,18 +568,14 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
/* get the physical core within this target socket */ /* get the physical core within this target socket */
phys_core = opal_paffinity_base_get_physical_core_id(target_socket, n); phys_core = opal_paffinity_base_get_physical_core_id(target_socket, n);
if (0 > phys_core) { if (0 > phys_core) {
if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) { ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
goto LAUNCH_PROCS;
}
orte_show_help("help-odls-default.txt", orte_show_help("help-odls-default.txt",
"odls-default:invalid-phys-cpu", true); "odls-default:invalid-phys-cpu", true);
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL); ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
} }
/* map this to a physical cpu on this node */ /* map this to a physical cpu on this node */
if (ORTE_SUCCESS != opal_paffinity_base_get_map_to_processor_id(target_socket, phys_core, &phys_cpu)) { if (ORTE_SUCCESS != opal_paffinity_base_get_map_to_processor_id(target_socket, phys_core, &phys_cpu)) {
if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) { ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
goto LAUNCH_PROCS;
}
orte_show_help("help-odls-default.txt", orte_show_help("help-odls-default.txt",
"odls-default:invalid-phys-cpu", true); "odls-default:invalid-phys-cpu", true);
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL); ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
@ -616,9 +607,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
ORTE_NAME_PRINT(child->name), target_socket, mask.bitmask[0]); ORTE_NAME_PRINT(child->name), target_socket, mask.bitmask[0]);
} }
if (ORTE_SUCCESS != (rc = opal_paffinity_base_set(mask))) { if (ORTE_SUCCESS != (rc = opal_paffinity_base_set(mask))) {
if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) { ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
goto LAUNCH_PROCS;
}
orte_show_help("help-odls-default.txt", orte_show_help("help-odls-default.txt",
"odls-default:failed-set-paff", true); "odls-default:failed-set-paff", true);
ORTE_ODLS_ERROR_OUT(rc); ORTE_ODLS_ERROR_OUT(rc);

Просмотреть файл

@ -366,7 +366,7 @@ int orte_register_params(void)
} else { } else {
binding = 0; binding = 0;
params = opal_argv_split(strval, ':'); params = opal_argv_split(strval, ':');
if (2 == opal_argv_count(params)) { if (1 < opal_argv_count(params)) {
if (0 != strcasecmp(params[1], "if-avail")) { if (0 != strcasecmp(params[1], "if-avail")) {
/* unknown option */ /* unknown option */
opal_output(0, "Unknown qualifier to orte_process_binding: %s", strval); opal_output(0, "Unknown qualifier to orte_process_binding: %s", strval);
@ -374,11 +374,11 @@ int orte_register_params(void)
} }
binding = ORTE_BIND_IF_SUPPORTED; binding = ORTE_BIND_IF_SUPPORTED;
} }
if (0 == strcasecmp(strval, "socket")) { if (0 == strcasecmp(params[0], "socket")) {
ORTE_SET_BINDING_POLICY(ORTE_BIND_TO_SOCKET | binding); ORTE_SET_BINDING_POLICY(ORTE_BIND_TO_SOCKET | binding);
} else if (0 == strcasecmp(strval, "board")) { } else if (0 == strcasecmp(params[0], "board")) {
ORTE_SET_BINDING_POLICY(ORTE_BIND_TO_BOARD | binding); ORTE_SET_BINDING_POLICY(ORTE_BIND_TO_BOARD | binding);
} else if (0 == strcasecmp(strval, "core")) { } else if (0 == strcasecmp(params[0], "core")) {
ORTE_SET_BINDING_POLICY(ORTE_BIND_TO_CORE | binding); ORTE_SET_BINDING_POLICY(ORTE_BIND_TO_CORE | binding);
} }
} }