Some (hopefully final!) adjustments and corrections to the paffinity support:
1. default -npersocket to force -bind-to-socket 2. if we cannot get a value for cores/socket, try using #logical cpus. otherwise, default to 1 core 3. add missing error message for not-enough-processors 4. since we no longer loop through orte_register_params twice, put the auto-detect of topology info in the rte_init for hnp and std_orted 5. fix bind-to-core, bysocket combination This commit was SVN r21992.
Этот коммит содержится в:
родитель
b91e7ba91f
Коммит
8da3aa8d5c
@ -35,6 +35,7 @@
|
||||
#include "opal/runtime/opal.h"
|
||||
#include "opal/runtime/opal_cr.h"
|
||||
#include "opal/mca/pstat/base/base.h"
|
||||
#include "opal/mca/paffinity/base/base.h"
|
||||
|
||||
#include "orte/mca/rml/base/base.h"
|
||||
#include "orte/mca/routed/base/base.h"
|
||||
@ -77,11 +78,33 @@ int orte_ess_base_orted_setup(char **hosts)
|
||||
int ret;
|
||||
char *error = NULL;
|
||||
char *plm_to_use;
|
||||
int value;
|
||||
|
||||
/* initialize the global list of local children and job data */
|
||||
OBJ_CONSTRUCT(&orte_local_children, opal_list_t);
|
||||
OBJ_CONSTRUCT(&orte_local_jobdata, opal_list_t);
|
||||
|
||||
/* determine the topology info */
|
||||
if (0 == orte_default_num_sockets_per_board) {
|
||||
/* we weren't given a number, so try to determine it */
|
||||
if (OPAL_SUCCESS != opal_paffinity_base_get_socket_info(&value)) {
|
||||
/* can't get any info - default to 1 */
|
||||
value = 1;
|
||||
}
|
||||
orte_default_num_sockets_per_board = (uint8_t)value;
|
||||
}
|
||||
if (0 == orte_default_num_cores_per_socket) {
|
||||
/* we weren't given a number, so try to determine it */
|
||||
if (OPAL_SUCCESS != opal_paffinity_base_get_core_info(0, &value)) {
|
||||
/* don't have topo info - can we at least get #processors? */
|
||||
if (OPAL_SUCCESS != opal_paffinity_base_get_processor_info(&value)) {
|
||||
/* can't get any info - default to 1 */
|
||||
value = 1;
|
||||
}
|
||||
}
|
||||
orte_default_num_cores_per_socket = (uint8_t)value;
|
||||
}
|
||||
|
||||
/* open and setup the opal_pstat framework so we can provide
|
||||
* process stats if requested
|
||||
*/
|
||||
|
@ -38,7 +38,7 @@
|
||||
#include "opal/util/malloc.h"
|
||||
#include "opal/util/basename.h"
|
||||
#include "opal/mca/pstat/base/base.h"
|
||||
#include "opal/mca/paffinity/paffinity.h"
|
||||
#include "opal/mca/paffinity/base/base.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/mca/rml/base/base.h"
|
||||
@ -116,6 +116,7 @@ static int rte_init(void)
|
||||
orte_job_t *jdata;
|
||||
orte_node_t *node;
|
||||
orte_proc_t *proc;
|
||||
int value;
|
||||
|
||||
/* initialize the global list of local children and job data */
|
||||
OBJ_CONSTRUCT(&orte_local_children, opal_list_t);
|
||||
@ -127,6 +128,27 @@ static int rte_init(void)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* determine the topology info */
|
||||
if (0 == orte_default_num_sockets_per_board) {
|
||||
/* we weren't given a number, so try to determine it */
|
||||
if (OPAL_SUCCESS != opal_paffinity_base_get_socket_info(&value)) {
|
||||
/* can't get any info - default to 1 */
|
||||
value = 1;
|
||||
}
|
||||
orte_default_num_sockets_per_board = (uint8_t)value;
|
||||
}
|
||||
if (0 == orte_default_num_cores_per_socket) {
|
||||
/* we weren't given a number, so try to determine it */
|
||||
if (OPAL_SUCCESS != (ret = opal_paffinity_base_get_core_info(0, &value))) {
|
||||
/* don't have topo info - can we at least get #processors? */
|
||||
if (OPAL_SUCCESS != opal_paffinity_base_get_processor_info(&value)) {
|
||||
/* can't get any info - default to 1 */
|
||||
value = 1;
|
||||
}
|
||||
}
|
||||
orte_default_num_cores_per_socket = (uint8_t)value;
|
||||
}
|
||||
|
||||
/* if we are using xml for output, put an mpirun start tag */
|
||||
if (orte_xml_output) {
|
||||
fprintf(orte_xml_fp, "<mpirun>\n");
|
||||
|
@ -120,3 +120,13 @@ Because the request was made on an "if-available" basis, the job was
|
||||
launched without taking the requested action. If this is not the desired
|
||||
behavior, talk to your local system administrator to find out if your
|
||||
system can support the requested action.
|
||||
#
|
||||
[odls-default:not-enough-resources]
|
||||
Not enough %s were found on the local host to meet the requested
|
||||
binding action:
|
||||
|
||||
Local host: %s
|
||||
Action requested: %s
|
||||
Application name: %s
|
||||
|
||||
Please revise the request and try again.
|
||||
|
@ -343,56 +343,107 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
||||
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
||||
}
|
||||
OPAL_PAFFINITY_CPU_ZERO(mask);
|
||||
if (ORTE_MAPPING_NPERXXX & jobdat->policy) {
|
||||
/* if npersocket was set, then we divide the number of cores
|
||||
* per socket by the #localprocs/#sockets to determine how many cores
|
||||
* each rank gets
|
||||
if (ORTE_MAPPING_BYSOCKET & jobdat->policy) {
|
||||
/* this corresponds to a mapping policy where
|
||||
* local rank 0 goes on socket 0, and local
|
||||
* rank 1 goes on socket 1, etc. - round robin
|
||||
* until all ranks are mapped
|
||||
*
|
||||
* NOTE: we already know our number of sockets
|
||||
* from when we initialized
|
||||
*/
|
||||
npersocket = jobdat->num_local_procs / orte_odls_globals.num_sockets;
|
||||
/* compute the #cores/process */
|
||||
jobdat->cpus_per_rank = orte_default_num_cores_per_socket / npersocket;
|
||||
/* figure out which logical cpu this node rank should start on as we
|
||||
* must ensure it starts on the right socket
|
||||
*/
|
||||
logical_cpu = (nrank / npersocket) * orte_default_num_cores_per_socket;
|
||||
/* now add an offset within the socket */
|
||||
logical_cpu += (nrank % npersocket) * jobdat->cpus_per_rank;
|
||||
} else {
|
||||
/* my starting core has to be offset by cpus_per_rank */
|
||||
logical_cpu = nrank * jobdat->cpus_per_rank;
|
||||
}
|
||||
for (n=0; n < jobdat->cpus_per_rank; n++) {
|
||||
/* are we bound? */
|
||||
if (orte_odls_globals.bound) {
|
||||
/* if we are bound, then use the logical_cpu as an index
|
||||
* against our available cores
|
||||
*/
|
||||
ncpu = 0;
|
||||
for (i=0; i < orte_odls_globals.num_processors && ncpu <= logical_cpu; i++) {
|
||||
if (OPAL_PAFFINITY_CPU_ISSET(i, orte_odls_globals.my_cores)) {
|
||||
ncpu++;
|
||||
phys_cpu = i;
|
||||
}
|
||||
}
|
||||
/* if we don't have enough processors, that is an error */
|
||||
if (ncpu < logical_cpu) {
|
||||
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-core");
|
||||
orte_show_help("help-odls-default.txt",
|
||||
"odls-default:not-enough-processors", true);
|
||||
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
||||
}
|
||||
} else {
|
||||
/* if we are not bound, then all processors are available
|
||||
* to us, so index into the node's array to get the
|
||||
* physical cpu
|
||||
*/
|
||||
phys_cpu = opal_paffinity_base_get_physical_processor_id(logical_cpu);
|
||||
if (0 > phys_cpu) {
|
||||
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-core");
|
||||
if (ORTE_LOCAL_RANK_INVALID == (lrank = orte_ess.get_local_rank(child->name))) {
|
||||
orte_show_help("help-odls-default.txt",
|
||||
"odls-default:invalid-local-rank", true);
|
||||
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
||||
}
|
||||
target_socket = opal_paffinity_base_get_physical_socket_id(lrank % orte_odls_globals.num_sockets);
|
||||
if (ORTE_ERR_NOT_SUPPORTED == target_socket) {
|
||||
/* OS does not support providing topology information */
|
||||
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-core");
|
||||
orte_show_help("help-odls-default.txt",
|
||||
"odls-default:topo-not-supported",
|
||||
true, orte_process_info.nodename, "bind-to-core", "",
|
||||
context->app);
|
||||
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output,
|
||||
"bysocket lrank %d numsocks %d logical socket %d target socket %d", (int)lrank,
|
||||
(int)orte_odls_globals.num_sockets,
|
||||
(int)(lrank % orte_odls_globals.num_sockets),
|
||||
target_socket));
|
||||
/* my starting core within this socket has to be offset by cpus_per_rank */
|
||||
logical_cpu = (lrank / orte_odls_globals.num_sockets) * jobdat->cpus_per_rank;
|
||||
/* cycle across the cpus_per_rank */
|
||||
for (n=0; n < jobdat->cpus_per_rank; n++) {
|
||||
ncpu = logical_cpu + (n * jobdat->stride);
|
||||
/* get the physical core within this target socket */
|
||||
phys_core = opal_paffinity_base_get_physical_core_id(target_socket, ncpu);
|
||||
if (0 > phys_core) {
|
||||
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
|
||||
orte_show_help("help-odls-default.txt",
|
||||
"odls-default:invalid-phys-cpu", true);
|
||||
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
||||
}
|
||||
/* map this to a physical cpu on this node */
|
||||
if (ORTE_SUCCESS != opal_paffinity_base_get_map_to_processor_id(target_socket, phys_core, &phys_cpu)) {
|
||||
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
|
||||
orte_show_help("help-odls-default.txt",
|
||||
"odls-default:invalid-phys-cpu", true);
|
||||
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
||||
}
|
||||
/* are we bound? */
|
||||
if (orte_odls_globals.bound) {
|
||||
/* see if this physical cpu is available to us */
|
||||
if (!OPAL_PAFFINITY_CPU_ISSET(phys_cpu, orte_odls_globals.my_cores)) {
|
||||
/* no it isn't - skip it */
|
||||
continue;
|
||||
}
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output,
|
||||
"%s odls:default:fork mapping phys socket %d core %d to phys_cpu %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
target_socket, phys_core, phys_cpu));
|
||||
OPAL_PAFFINITY_CPU_SET(phys_cpu, mask);
|
||||
}
|
||||
} else {
|
||||
/* my starting core has to be offset by cpus_per_rank */
|
||||
logical_cpu = nrank * jobdat->cpus_per_rank;
|
||||
for (n=0; n < jobdat->cpus_per_rank; n++) {
|
||||
/* are we bound? */
|
||||
if (orte_odls_globals.bound) {
|
||||
/* if we are bound, then use the logical_cpu as an index
|
||||
* against our available cores
|
||||
*/
|
||||
ncpu = 0;
|
||||
for (i=0; i < orte_odls_globals.num_processors && ncpu <= logical_cpu; i++) {
|
||||
if (OPAL_PAFFINITY_CPU_ISSET(i, orte_odls_globals.my_cores)) {
|
||||
ncpu++;
|
||||
phys_cpu = i;
|
||||
}
|
||||
}
|
||||
/* if we don't have enough processors, that is an error */
|
||||
if (ncpu < logical_cpu) {
|
||||
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-core");
|
||||
orte_show_help("help-odls-default.txt",
|
||||
"odls-default:not-enough-resources", true,
|
||||
"processors", orte_process_info.nodename,
|
||||
"bind-to-core", context->app);
|
||||
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
||||
}
|
||||
} else {
|
||||
/* if we are not bound, then all processors are available
|
||||
* to us, so index into the node's array to get the
|
||||
* physical cpu
|
||||
*/
|
||||
phys_cpu = opal_paffinity_base_get_physical_processor_id(logical_cpu);
|
||||
if (0 > phys_cpu) {
|
||||
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-core");
|
||||
orte_show_help("help-odls-default.txt",
|
||||
"odls-default:invalid-phys-cpu", true);
|
||||
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
||||
}
|
||||
}
|
||||
}
|
||||
OPAL_PAFFINITY_CPU_SET(phys_cpu, mask);
|
||||
logical_cpu += jobdat->stride;
|
||||
@ -448,7 +499,9 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
||||
if (n < logical_skt) {
|
||||
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
|
||||
orte_show_help("help-odls-default.txt",
|
||||
"odls-default:not-enough-sockets", true);
|
||||
"odls-default:not-enough-resources", true,
|
||||
"sockets", orte_process_info.nodename,
|
||||
"bind-to-socket", context->app);
|
||||
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
||||
}
|
||||
} else {
|
||||
@ -485,7 +538,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
||||
true, orte_process_info.nodename, "bind-to-socket", "",
|
||||
context->app);
|
||||
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
||||
}
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output,
|
||||
"bysocket lrank %d numsocks %d logical socket %d target socket %d", (int)lrank,
|
||||
(int)orte_odls_globals.num_sockets,
|
||||
@ -515,7 +568,9 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
||||
if (ncpu < logical_cpu) {
|
||||
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
|
||||
orte_show_help("help-odls-default.txt",
|
||||
"odls-default:not-enough-processors", true);
|
||||
"odls-default:not-enough-resources", true,
|
||||
"processors", orte_process_info.nodename,
|
||||
"bind-to-socket", context->app);
|
||||
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
||||
}
|
||||
/* get the physical socket of that cpu */
|
||||
@ -524,7 +579,9 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
||||
goto LAUNCH_PROCS;
|
||||
}
|
||||
orte_show_help("help-odls-default.txt",
|
||||
"odls-default:not-enough-sockets", true);
|
||||
"odls-default:topo-not-supported",
|
||||
true, orte_process_info.nodename, "bind-to-socket", "",
|
||||
context->app);
|
||||
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
|
||||
}
|
||||
} else {
|
||||
|
@ -138,8 +138,8 @@ int orte_rmaps_base_open(void)
|
||||
false, false, -1, &orte_rmaps_base.npersocket);
|
||||
if (0 < orte_rmaps_base.npersocket) {
|
||||
ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_NPERXXX);
|
||||
/* bind to socket, UNLESS the user already specified something else */
|
||||
ORTE_XSET_BINDING_POLICY(ORTE_BIND_TO_SOCKET);
|
||||
/* force bind to socket */
|
||||
ORTE_SET_BINDING_POLICY(ORTE_BIND_TO_SOCKET);
|
||||
}
|
||||
|
||||
/* Do we want to loadbalance the job */
|
||||
|
@ -33,6 +33,7 @@
|
||||
#include "opal/util/argv.h"
|
||||
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
|
||||
#include "orte/runtime/runtime.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
@ -348,19 +349,15 @@ int orte_register_params(void)
|
||||
"Number of processor boards/node (1-256) [default: 1]",
|
||||
false, false, 1, &value);
|
||||
orte_default_num_boards = (uint8_t)value;
|
||||
if (OPAL_SUCCESS != opal_paffinity_base_get_socket_info(&value)) {
|
||||
value = 1;
|
||||
}
|
||||
|
||||
mca_base_param_reg_int_name("orte", "num_sockets",
|
||||
"Number of sockets/board (1-256) [default: auto-sensed by mpirun or 1]",
|
||||
false, false, value, &value);
|
||||
"Number of sockets/board (1-256)",
|
||||
false, false, 0, &value);
|
||||
orte_default_num_sockets_per_board = (uint8_t)value;
|
||||
if (OPAL_SUCCESS != opal_paffinity_base_get_core_info(0, &value)) {
|
||||
value = 1;
|
||||
}
|
||||
|
||||
mca_base_param_reg_int_name("orte", "num_cores",
|
||||
"Number of cores/socket (1-256) [default: auto-sensed by mpirun or 1]",
|
||||
false, false, value, &value);
|
||||
"Number of cores/socket (1-256)",
|
||||
false, false, 0, &value);
|
||||
orte_default_num_cores_per_socket = (uint8_t)value;
|
||||
|
||||
/* cpu allocation specification */
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user