Final cleanup on new paffinity "if-avail" messages, plus fix one bug reported by Terry
This commit was SVN r21978.
Этот коммит содержится в:
родитель
b18ca686ae
Коммит
7138fd131f
@ -107,3 +107,14 @@ use "bind to socket" and other related functionality.
|
|||||||
Local host: %s
|
Local host: %s
|
||||||
Action attempted: %s %s
|
Action attempted: %s %s
|
||||||
Application name: %s
|
Application name: %s
|
||||||
|
#
|
||||||
|
[odls-default:binding-not-avail]
|
||||||
|
A request to bind the processes if the operating system supports
|
||||||
|
such an operation was made, but the OS does not support this operation:
|
||||||
|
|
||||||
|
Action requested: %s
|
||||||
|
|
||||||
|
Because the request was made on an "if-available" basis, the job was
|
||||||
|
launched without taking the requested action. If this is not the desired
|
||||||
|
behavior, talk to your local system administrator to find out if your
|
||||||
|
system can support the requested action.
|
||||||
|
@ -109,6 +109,19 @@ orte_odls_base_module_t orte_odls_default_module = {
|
|||||||
exit(1); \
|
exit(1); \
|
||||||
} while(0);
|
} while(0);
|
||||||
|
|
||||||
|
/* convenience macro for checking binding requirements */
|
||||||
|
#define ORTE_ODLS_IF_BIND_NOT_REQD(n) \
|
||||||
|
do { \
|
||||||
|
if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) { \
|
||||||
|
if (orte_odls_globals.report_bindings) { \
|
||||||
|
orte_show_help("help-odls-default.txt", \
|
||||||
|
"odle-default:binding-not-avail", \
|
||||||
|
true, (n)); \
|
||||||
|
} \
|
||||||
|
goto LAUNCH_PROCS; \
|
||||||
|
} \
|
||||||
|
} while(0);
|
||||||
|
|
||||||
static bool odls_default_child_died(pid_t pid, unsigned int timeout, int *exit_status)
|
static bool odls_default_child_died(pid_t pid, unsigned int timeout, int *exit_status)
|
||||||
{
|
{
|
||||||
time_t end;
|
time_t end;
|
||||||
@ -362,9 +375,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
|||||||
}
|
}
|
||||||
/* if we don't have enough processors, that is an error */
|
/* if we don't have enough processors, that is an error */
|
||||||
if (ncpu < logical_cpu) {
|
if (ncpu < logical_cpu) {
|
||||||
if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) {
|
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-core");
|
||||||
goto LAUNCH_PROCS;
|
|
||||||
}
|
|
||||||
orte_show_help("help-odls-default.txt",
|
orte_show_help("help-odls-default.txt",
|
||||||
"odls-default:not-enough-processors", true);
|
"odls-default:not-enough-processors", true);
|
||||||
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
||||||
@ -376,9 +387,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
|||||||
*/
|
*/
|
||||||
phys_cpu = opal_paffinity_base_get_physical_processor_id(logical_cpu);
|
phys_cpu = opal_paffinity_base_get_physical_processor_id(logical_cpu);
|
||||||
if (0 > phys_cpu) {
|
if (0 > phys_cpu) {
|
||||||
if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) {
|
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-core");
|
||||||
goto LAUNCH_PROCS;
|
|
||||||
}
|
|
||||||
orte_show_help("help-odls-default.txt",
|
orte_show_help("help-odls-default.txt",
|
||||||
"odls-default:invalid-phys-cpu", true);
|
"odls-default:invalid-phys-cpu", true);
|
||||||
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
||||||
@ -393,9 +402,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
|||||||
ORTE_NAME_PRINT(child->name), mask.bitmask[0]);
|
ORTE_NAME_PRINT(child->name), mask.bitmask[0]);
|
||||||
}
|
}
|
||||||
if (ORTE_SUCCESS != (rc = opal_paffinity_base_set(mask))) {
|
if (ORTE_SUCCESS != (rc = opal_paffinity_base_set(mask))) {
|
||||||
if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) {
|
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-core");
|
||||||
goto LAUNCH_PROCS;
|
|
||||||
}
|
|
||||||
orte_show_help("help-odls-default.txt",
|
orte_show_help("help-odls-default.txt",
|
||||||
"odls-default:failed-set-paff", true);
|
"odls-default:failed-set-paff", true);
|
||||||
ORTE_ODLS_ERROR_OUT(rc);
|
ORTE_ODLS_ERROR_OUT(rc);
|
||||||
@ -438,9 +445,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
|||||||
}
|
}
|
||||||
/* if we don't have enough sockets, that is an error */
|
/* if we don't have enough sockets, that is an error */
|
||||||
if (n < logical_skt) {
|
if (n < logical_skt) {
|
||||||
if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) {
|
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
|
||||||
goto LAUNCH_PROCS;
|
|
||||||
}
|
|
||||||
orte_show_help("help-odls-default.txt",
|
orte_show_help("help-odls-default.txt",
|
||||||
"odls-default:not-enough-sockets", true);
|
"odls-default:not-enough-sockets", true);
|
||||||
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
||||||
@ -449,9 +454,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
|||||||
target_socket = opal_paffinity_base_get_physical_socket_id(logical_skt);
|
target_socket = opal_paffinity_base_get_physical_socket_id(logical_skt);
|
||||||
if (ORTE_ERR_NOT_SUPPORTED == target_socket) {
|
if (ORTE_ERR_NOT_SUPPORTED == target_socket) {
|
||||||
/* OS doesn't support providing topology information */
|
/* OS doesn't support providing topology information */
|
||||||
if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) {
|
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
|
||||||
goto LAUNCH_PROCS;
|
|
||||||
}
|
|
||||||
orte_show_help("help-odls-default.txt",
|
orte_show_help("help-odls-default.txt",
|
||||||
"odls-default:topo-not-supported",
|
"odls-default:topo-not-supported",
|
||||||
true, orte_process_info.nodename, "bind-to-socket", "",
|
true, orte_process_info.nodename, "bind-to-socket", "",
|
||||||
@ -475,9 +478,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
|||||||
target_socket = opal_paffinity_base_get_physical_socket_id(lrank % orte_odls_globals.num_sockets);
|
target_socket = opal_paffinity_base_get_physical_socket_id(lrank % orte_odls_globals.num_sockets);
|
||||||
if (ORTE_ERR_NOT_SUPPORTED == target_socket) {
|
if (ORTE_ERR_NOT_SUPPORTED == target_socket) {
|
||||||
/* OS does not support providing topology information */
|
/* OS does not support providing topology information */
|
||||||
if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) {
|
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
|
||||||
goto LAUNCH_PROCS;
|
|
||||||
}
|
|
||||||
orte_show_help("help-odls-default.txt",
|
orte_show_help("help-odls-default.txt",
|
||||||
"odls-default:topo-not-supported",
|
"odls-default:topo-not-supported",
|
||||||
true, orte_process_info.nodename, "bind-to-socket", "",
|
true, orte_process_info.nodename, "bind-to-socket", "",
|
||||||
@ -511,9 +512,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
|||||||
}
|
}
|
||||||
/* if we don't have enough processors, that is an error */
|
/* if we don't have enough processors, that is an error */
|
||||||
if (ncpu < logical_cpu) {
|
if (ncpu < logical_cpu) {
|
||||||
if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) {
|
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
|
||||||
goto LAUNCH_PROCS;
|
|
||||||
}
|
|
||||||
orte_show_help("help-odls-default.txt",
|
orte_show_help("help-odls-default.txt",
|
||||||
"odls-default:not-enough-processors", true);
|
"odls-default:not-enough-processors", true);
|
||||||
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
||||||
@ -534,9 +533,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
|||||||
target_socket = opal_paffinity_base_get_physical_socket_id(0);
|
target_socket = opal_paffinity_base_get_physical_socket_id(0);
|
||||||
if (ORTE_ERR_NOT_SUPPORTED == target_socket) {
|
if (ORTE_ERR_NOT_SUPPORTED == target_socket) {
|
||||||
/* OS doesn't support providing topology information */
|
/* OS doesn't support providing topology information */
|
||||||
if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) {
|
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
|
||||||
goto LAUNCH_PROCS;
|
|
||||||
}
|
|
||||||
orte_show_help("help-odls-default.txt",
|
orte_show_help("help-odls-default.txt",
|
||||||
"odls-default:topo-not-supported",
|
"odls-default:topo-not-supported",
|
||||||
true, orte_process_info.nodename, "bind-to-socket", "",
|
true, orte_process_info.nodename, "bind-to-socket", "",
|
||||||
@ -552,9 +549,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
|||||||
target_socket = opal_paffinity_base_get_physical_socket_id(logical_skt);
|
target_socket = opal_paffinity_base_get_physical_socket_id(logical_skt);
|
||||||
if (ORTE_ERR_NOT_SUPPORTED == target_socket) {
|
if (ORTE_ERR_NOT_SUPPORTED == target_socket) {
|
||||||
/* OS doesn't support providing topology information */
|
/* OS doesn't support providing topology information */
|
||||||
if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) {
|
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
|
||||||
goto LAUNCH_PROCS;
|
|
||||||
}
|
|
||||||
orte_show_help("help-odls-default.txt",
|
orte_show_help("help-odls-default.txt",
|
||||||
"odls-default:topo-not-supported",
|
"odls-default:topo-not-supported",
|
||||||
true, orte_process_info.nodename, "bind-to-socket", "",
|
true, orte_process_info.nodename, "bind-to-socket", "",
|
||||||
@ -573,18 +568,14 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
|||||||
/* get the physical core within this target socket */
|
/* get the physical core within this target socket */
|
||||||
phys_core = opal_paffinity_base_get_physical_core_id(target_socket, n);
|
phys_core = opal_paffinity_base_get_physical_core_id(target_socket, n);
|
||||||
if (0 > phys_core) {
|
if (0 > phys_core) {
|
||||||
if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) {
|
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
|
||||||
goto LAUNCH_PROCS;
|
|
||||||
}
|
|
||||||
orte_show_help("help-odls-default.txt",
|
orte_show_help("help-odls-default.txt",
|
||||||
"odls-default:invalid-phys-cpu", true);
|
"odls-default:invalid-phys-cpu", true);
|
||||||
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
||||||
}
|
}
|
||||||
/* map this to a physical cpu on this node */
|
/* map this to a physical cpu on this node */
|
||||||
if (ORTE_SUCCESS != opal_paffinity_base_get_map_to_processor_id(target_socket, phys_core, &phys_cpu)) {
|
if (ORTE_SUCCESS != opal_paffinity_base_get_map_to_processor_id(target_socket, phys_core, &phys_cpu)) {
|
||||||
if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) {
|
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
|
||||||
goto LAUNCH_PROCS;
|
|
||||||
}
|
|
||||||
orte_show_help("help-odls-default.txt",
|
orte_show_help("help-odls-default.txt",
|
||||||
"odls-default:invalid-phys-cpu", true);
|
"odls-default:invalid-phys-cpu", true);
|
||||||
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
||||||
@ -616,9 +607,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
|||||||
ORTE_NAME_PRINT(child->name), target_socket, mask.bitmask[0]);
|
ORTE_NAME_PRINT(child->name), target_socket, mask.bitmask[0]);
|
||||||
}
|
}
|
||||||
if (ORTE_SUCCESS != (rc = opal_paffinity_base_set(mask))) {
|
if (ORTE_SUCCESS != (rc = opal_paffinity_base_set(mask))) {
|
||||||
if (ORTE_BINDING_NOT_REQUIRED(jobdat->policy)) {
|
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
|
||||||
goto LAUNCH_PROCS;
|
|
||||||
}
|
|
||||||
orte_show_help("help-odls-default.txt",
|
orte_show_help("help-odls-default.txt",
|
||||||
"odls-default:failed-set-paff", true);
|
"odls-default:failed-set-paff", true);
|
||||||
ORTE_ODLS_ERROR_OUT(rc);
|
ORTE_ODLS_ERROR_OUT(rc);
|
||||||
|
@ -366,7 +366,7 @@ int orte_register_params(void)
|
|||||||
} else {
|
} else {
|
||||||
binding = 0;
|
binding = 0;
|
||||||
params = opal_argv_split(strval, ':');
|
params = opal_argv_split(strval, ':');
|
||||||
if (2 == opal_argv_count(params)) {
|
if (1 < opal_argv_count(params)) {
|
||||||
if (0 != strcasecmp(params[1], "if-avail")) {
|
if (0 != strcasecmp(params[1], "if-avail")) {
|
||||||
/* unknown option */
|
/* unknown option */
|
||||||
opal_output(0, "Unknown qualifier to orte_process_binding: %s", strval);
|
opal_output(0, "Unknown qualifier to orte_process_binding: %s", strval);
|
||||||
@ -374,11 +374,11 @@ int orte_register_params(void)
|
|||||||
}
|
}
|
||||||
binding = ORTE_BIND_IF_SUPPORTED;
|
binding = ORTE_BIND_IF_SUPPORTED;
|
||||||
}
|
}
|
||||||
if (0 == strcasecmp(strval, "socket")) {
|
if (0 == strcasecmp(params[0], "socket")) {
|
||||||
ORTE_SET_BINDING_POLICY(ORTE_BIND_TO_SOCKET | binding);
|
ORTE_SET_BINDING_POLICY(ORTE_BIND_TO_SOCKET | binding);
|
||||||
} else if (0 == strcasecmp(strval, "board")) {
|
} else if (0 == strcasecmp(params[0], "board")) {
|
||||||
ORTE_SET_BINDING_POLICY(ORTE_BIND_TO_BOARD | binding);
|
ORTE_SET_BINDING_POLICY(ORTE_BIND_TO_BOARD | binding);
|
||||||
} else if (0 == strcasecmp(strval, "core")) {
|
} else if (0 == strcasecmp(params[0], "core")) {
|
||||||
ORTE_SET_BINDING_POLICY(ORTE_BIND_TO_CORE | binding);
|
ORTE_SET_BINDING_POLICY(ORTE_BIND_TO_CORE | binding);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user