Yet another paffinity cleanup...sigh.
1. ensure that orte_rmaps_base_schedule_policy does not override cmd line settings 2. when you try to bind to more cores than we have, generate a not-enough-processors error message 3. allow npersocket -bind-to-core combination - because, yes, somebody actually wants to do it. This commit was SVN r21996.
Этот коммит содержится в:
родитель
5406fdfb80
Коммит
dff0d01673
@ -342,8 +342,65 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
||||
"odls-default:invalid-node-rank", true);
|
||||
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
||||
}
|
||||
/* get the local rank */
|
||||
if (ORTE_LOCAL_RANK_INVALID == (lrank = orte_ess.get_local_rank(child->name))) {
|
||||
orte_show_help("help-odls-default.txt",
|
||||
"odls-default:invalid-local-rank", true);
|
||||
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
||||
}
|
||||
/* init the mask */
|
||||
OPAL_PAFFINITY_CPU_ZERO(mask);
|
||||
if (ORTE_MAPPING_BYSOCKET & jobdat->policy) {
|
||||
if (ORTE_MAPPING_NPERXXX & jobdat->policy) {
|
||||
/* we need to balance the children from this job across the available sockets */
|
||||
npersocket = jobdat->num_local_procs / orte_odls_globals.num_sockets;
|
||||
/* determine the socket to use based on those available */
|
||||
if (npersocket < 2) {
|
||||
/* if we only have 1/sock, or we have less procs than sockets,
|
||||
* then just put it on the lrank socket
|
||||
*/
|
||||
logical_skt = lrank;
|
||||
} else if (ORTE_MAPPING_BYSOCKET & jobdat->policy) {
|
||||
logical_skt = lrank % npersocket;
|
||||
} else {
|
||||
logical_skt = lrank / npersocket;
|
||||
}
|
||||
if (orte_odls_globals.bound) {
|
||||
/* if we are bound, use this as an index into our available sockets */
|
||||
for (target_socket=0; target_socket < opal_bitmap_size(&orte_odls_globals.sockets) && n < logical_skt; target_socket++) {
|
||||
if (opal_bitmap_is_set_bit(&orte_odls_globals.sockets, target_socket)) {
|
||||
n++;
|
||||
}
|
||||
}
|
||||
/* if we don't have enough sockets, that is an error */
|
||||
if (n < logical_skt) {
|
||||
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-core");
|
||||
orte_show_help("help-odls-default.txt",
|
||||
"odls-default:not-enough-resources", true,
|
||||
"sockets", orte_process_info.nodename,
|
||||
"bind-to-core", context->app);
|
||||
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
||||
}
|
||||
} else {
|
||||
target_socket = opal_paffinity_base_get_physical_socket_id(logical_skt);
|
||||
if (ORTE_ERR_NOT_SUPPORTED == target_socket) {
|
||||
/* OS doesn't support providing topology information */
|
||||
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-core");
|
||||
orte_show_help("help-odls-default.txt",
|
||||
"odls-default:topo-not-supported",
|
||||
true, orte_process_info.nodename, "bind-to-core", "",
|
||||
context->app);
|
||||
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
||||
}
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output,
|
||||
"%s odls:default:fork child %s local rank %d npersocket %d logical socket %d target socket %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(child->name), lrank,
|
||||
npersocket, logical_skt, target_socket));
|
||||
/* set the starting point */
|
||||
logical_cpu = (lrank % npersocket) * jobdat->cpus_per_rank;
|
||||
/* bind to this socket */
|
||||
goto bind_socket;
|
||||
} else if (ORTE_MAPPING_BYSOCKET & jobdat->policy) {
|
||||
/* this corresponds to a mapping policy where
|
||||
* local rank 0 goes on socket 0, and local
|
||||
* rank 1 goes on socket 1, etc. - round robin
|
||||
@ -352,11 +409,6 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
||||
* NOTE: we already know our number of sockets
|
||||
* from when we initialized
|
||||
*/
|
||||
if (ORTE_LOCAL_RANK_INVALID == (lrank = orte_ess.get_local_rank(child->name))) {
|
||||
orte_show_help("help-odls-default.txt",
|
||||
"odls-default:invalid-local-rank", true);
|
||||
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
||||
}
|
||||
target_socket = opal_paffinity_base_get_physical_socket_id(lrank % orte_odls_globals.num_sockets);
|
||||
if (ORTE_ERR_NOT_SUPPORTED == target_socket) {
|
||||
/* OS does not support providing topology information */
|
||||
@ -374,22 +426,25 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
||||
target_socket));
|
||||
/* my starting core within this socket has to be offset by cpus_per_rank */
|
||||
logical_cpu = (lrank / orte_odls_globals.num_sockets) * jobdat->cpus_per_rank;
|
||||
|
||||
bind_socket:
|
||||
/* cycle across the cpus_per_rank */
|
||||
for (n=0; n < jobdat->cpus_per_rank; n++) {
|
||||
ncpu = logical_cpu + (n * jobdat->stride);
|
||||
/* get the physical core within this target socket */
|
||||
phys_core = opal_paffinity_base_get_physical_core_id(target_socket, ncpu);
|
||||
phys_core = opal_paffinity_base_get_physical_core_id(target_socket, logical_cpu);
|
||||
if (0 > phys_core) {
|
||||
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
|
||||
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-core");
|
||||
orte_show_help("help-odls-default.txt",
|
||||
"odls-default:invalid-phys-cpu", true);
|
||||
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
||||
}
|
||||
/* map this to a physical cpu on this node */
|
||||
if (ORTE_SUCCESS != opal_paffinity_base_get_map_to_processor_id(target_socket, phys_core, &phys_cpu)) {
|
||||
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
|
||||
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-core");
|
||||
orte_show_help("help-odls-default.txt",
|
||||
"odls-default:invalid-phys-cpu", true);
|
||||
"odls-default:not-enough-resources", true,
|
||||
"processors", orte_process_info.nodename,
|
||||
"bind-to-core", context->app);
|
||||
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
||||
}
|
||||
/* are we bound? */
|
||||
@ -405,6 +460,13 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
target_socket, phys_core, phys_cpu));
|
||||
OPAL_PAFFINITY_CPU_SET(phys_cpu, mask);
|
||||
/* increment logical cpu */
|
||||
logical_cpu += jobdat->stride;
|
||||
}
|
||||
if (orte_odls_globals.report_bindings) {
|
||||
opal_output(0, "%s odls:default:fork binding child %s to socket %d cpus %04lx",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(child->name), target_socket, mask.bitmask[0]);
|
||||
}
|
||||
} else {
|
||||
/* my starting core has to be offset by cpus_per_rank */
|
||||
@ -444,14 +506,15 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
||||
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
||||
}
|
||||
}
|
||||
OPAL_PAFFINITY_CPU_SET(phys_cpu, mask);
|
||||
/* increment logical cpu */
|
||||
logical_cpu += jobdat->stride;
|
||||
}
|
||||
if (orte_odls_globals.report_bindings) {
|
||||
opal_output(0, "%s odls:default:fork binding child %s to cpus %04lx",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(child->name), mask.bitmask[0]);
|
||||
}
|
||||
OPAL_PAFFINITY_CPU_SET(phys_cpu, mask);
|
||||
logical_cpu += jobdat->stride;
|
||||
}
|
||||
if (orte_odls_globals.report_bindings) {
|
||||
opal_output(0, "%s odls:default:fork binding child %s to cpus %04lx",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(child->name), mask.bitmask[0]);
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = opal_paffinity_base_set(mask))) {
|
||||
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-core");
|
||||
|
@ -94,15 +94,21 @@ int orte_rmaps_base_open(void)
|
||||
|
||||
/* Are we scheduling by node or by slot? */
|
||||
param = mca_base_param_reg_string_name("rmaps", "base_schedule_policy",
|
||||
"Scheduling Policy for RMAPS. [slot (default) | socket | board | node]",
|
||||
"Scheduling Policy for RMAPS. [slot (alias:core) | socket | board | node]",
|
||||
false, false, "unspec", &policy);
|
||||
|
||||
if (0 == strcmp(policy, "socket")) {
|
||||
ORTE_SET_MAPPING_POLICY(ORTE_MAPPING_BYSOCKET);
|
||||
} else if (0 == strcmp(policy, "board")) {
|
||||
ORTE_SET_MAPPING_POLICY(ORTE_MAPPING_BYBOARD);
|
||||
} else if (0 == strcmp(policy, "node")) {
|
||||
ORTE_SET_MAPPING_POLICY(ORTE_MAPPING_BYNODE);
|
||||
/* if something is specified, do not override what may already
|
||||
* be present - could have been given on cmd line
|
||||
*/
|
||||
if (0 == strcasecmp(policy, "slot") ||
|
||||
0 == strcasecmp(policy, "core")) {
|
||||
ORTE_XSET_MAPPING_POLICY(ORTE_MAPPING_BYSLOT);
|
||||
} else if (0 == strcasecmp(policy, "socket")) {
|
||||
ORTE_XSET_MAPPING_POLICY(ORTE_MAPPING_BYSOCKET);
|
||||
} else if (0 == strcasecmp(policy, "board")) {
|
||||
ORTE_XSET_MAPPING_POLICY(ORTE_MAPPING_BYBOARD);
|
||||
} else if (0 == strcasecmp(policy, "node")) {
|
||||
ORTE_XSET_MAPPING_POLICY(ORTE_MAPPING_BYNODE);
|
||||
}
|
||||
/* if nothing was specified, leave it alone - we already set it
|
||||
* in orterun
|
||||
@ -138,8 +144,8 @@ int orte_rmaps_base_open(void)
|
||||
false, false, -1, &orte_rmaps_base.npersocket);
|
||||
if (0 < orte_rmaps_base.npersocket) {
|
||||
ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_NPERXXX);
|
||||
/* force bind to socket */
|
||||
ORTE_SET_BINDING_POLICY(ORTE_BIND_TO_SOCKET);
|
||||
/* force bind to socket if not overridden by user */
|
||||
ORTE_XSET_BINDING_POLICY(ORTE_BIND_TO_SOCKET);
|
||||
}
|
||||
|
||||
/* Do we want to loadbalance the job */
|
||||
|
@ -293,6 +293,16 @@ typedef uint16_t orte_mapping_policy_t;
|
||||
/* nice macro for setting these */
|
||||
#define ORTE_SET_MAPPING_POLICY(pol) \
|
||||
orte_default_mapping_policy = (orte_default_mapping_policy & 0x00ff) | (pol);
|
||||
/* macro to detect if some other policy has been set */
|
||||
#define ORTE_XSET_MAPPING_POLICY(pol) \
|
||||
do { \
|
||||
orte_mapping_policy_t tmp; \
|
||||
tmp = (orte_default_mapping_policy & 0xff00) & ~(pol); \
|
||||
if (0 == tmp) { \
|
||||
ORTE_SET_MAPPING_POLICY((pol)); \
|
||||
} \
|
||||
} while(0);
|
||||
/* macro to add another mapping policy */
|
||||
#define ORTE_ADD_MAPPING_POLICY(pol) \
|
||||
orte_default_mapping_policy |= (pol);
|
||||
|
||||
|
@ -263,6 +263,9 @@ static opal_cmd_line_init_t cmd_line_init[] = {
|
||||
{ NULL, NULL, NULL, '\0', "byslot", "byslot", 0,
|
||||
&orterun_globals.by_slot, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Whether to assign processes round-robin by slot (the default)" },
|
||||
{ NULL, NULL, NULL, '\0', "bycore", "bycore", 0,
|
||||
&orterun_globals.by_slot, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Alias for byslot" },
|
||||
{ NULL, NULL, NULL, '\0', "bysocket", "bysocket", 0,
|
||||
&orterun_globals.by_socket, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Whether to assign processes round-robin by socket" },
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user