1
1

Some (hopefully final!) adjustments and corrections to the paffinity support:

1. default -npersocket to force -bind-to-socket

2. if we cannot get a value for cores/socket, try using #logical cpus. otherwise, default to 1 core

3. add missing error message for not-enough-processors

4. since we no longer loop through orte_register_params twice, put the auto-detect of
   topology info in the rte_init for hnp and std_orted

5. fix bind-to-core, bysocket combination

This commit was SVN r21992.
Этот коммит содержится в:
Ralph Castain 2009-09-22 15:41:03 +00:00
родитель b91e7ba91f
Коммит 8da3aa8d5c
6 изменённых файлов: 171 добавлений и 62 удалений

Просмотреть файл

@ -35,6 +35,7 @@
#include "opal/runtime/opal.h"
#include "opal/runtime/opal_cr.h"
#include "opal/mca/pstat/base/base.h"
#include "opal/mca/paffinity/base/base.h"
#include "orte/mca/rml/base/base.h"
#include "orte/mca/routed/base/base.h"
@ -77,11 +78,33 @@ int orte_ess_base_orted_setup(char **hosts)
int ret;
char *error = NULL;
char *plm_to_use;
int value;
/* initialize the global list of local children and job data */
OBJ_CONSTRUCT(&orte_local_children, opal_list_t);
OBJ_CONSTRUCT(&orte_local_jobdata, opal_list_t);
/* determine the topology info */
if (0 == orte_default_num_sockets_per_board) {
/* we weren't given a number, so try to determine it */
if (OPAL_SUCCESS != opal_paffinity_base_get_socket_info(&value)) {
/* can't get any info - default to 1 */
value = 1;
}
orte_default_num_sockets_per_board = (uint8_t)value;
}
if (0 == orte_default_num_cores_per_socket) {
/* we weren't given a number, so try to determine it */
if (OPAL_SUCCESS != opal_paffinity_base_get_core_info(0, &value)) {
/* don't have topo info - can we at least get #processors? */
if (OPAL_SUCCESS != opal_paffinity_base_get_processor_info(&value)) {
/* can't get any info - default to 1 */
value = 1;
}
}
orte_default_num_cores_per_socket = (uint8_t)value;
}
/* open and setup the opal_pstat framework so we can provide
* process stats if requested
*/

Просмотреть файл

@ -38,7 +38,7 @@
#include "opal/util/malloc.h"
#include "opal/util/basename.h"
#include "opal/mca/pstat/base/base.h"
#include "opal/mca/paffinity/paffinity.h"
#include "opal/mca/paffinity/base/base.h"
#include "orte/util/show_help.h"
#include "orte/mca/rml/base/base.h"
@ -116,6 +116,7 @@ static int rte_init(void)
orte_job_t *jdata;
orte_node_t *node;
orte_proc_t *proc;
int value;
/* initialize the global list of local children and job data */
OBJ_CONSTRUCT(&orte_local_children, opal_list_t);
@ -127,6 +128,27 @@ static int rte_init(void)
goto error;
}
/* determine the topology info */
if (0 == orte_default_num_sockets_per_board) {
/* we weren't given a number, so try to determine it */
if (OPAL_SUCCESS != opal_paffinity_base_get_socket_info(&value)) {
/* can't get any info - default to 1 */
value = 1;
}
orte_default_num_sockets_per_board = (uint8_t)value;
}
if (0 == orte_default_num_cores_per_socket) {
/* we weren't given a number, so try to determine it */
if (OPAL_SUCCESS != (ret = opal_paffinity_base_get_core_info(0, &value))) {
/* don't have topo info - can we at least get #processors? */
if (OPAL_SUCCESS != opal_paffinity_base_get_processor_info(&value)) {
/* can't get any info - default to 1 */
value = 1;
}
}
orte_default_num_cores_per_socket = (uint8_t)value;
}
/* if we are using xml for output, put an mpirun start tag */
if (orte_xml_output) {
fprintf(orte_xml_fp, "<mpirun>\n");

Просмотреть файл

@ -120,3 +120,13 @@ Because the request was made on an "if-available" basis, the job was
launched without taking the requested action. If this is not the desired
behavior, talk to your local system administrator to find out if your
system can support the requested action.
#
[odls-default:not-enough-resources]
Not enough %s were found on the local host to meet the requested
binding action:
Local host: %s
Action requested: %s
Application name: %s
Please revise the request and try again.

Просмотреть файл

@ -343,56 +343,107 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
}
OPAL_PAFFINITY_CPU_ZERO(mask);
if (ORTE_MAPPING_NPERXXX & jobdat->policy) {
/* if npersocket was set, then we divide the number of cores
* per socket by the #localprocs/#sockets to determine how many cores
* each rank gets
if (ORTE_MAPPING_BYSOCKET & jobdat->policy) {
/* this corresponds to a mapping policy where
* local rank 0 goes on socket 0, and local
* rank 1 goes on socket 1, etc. - round robin
* until all ranks are mapped
*
* NOTE: we already know our number of sockets
* from when we initialized
*/
npersocket = jobdat->num_local_procs / orte_odls_globals.num_sockets;
/* compute the #cores/process */
jobdat->cpus_per_rank = orte_default_num_cores_per_socket / npersocket;
/* figure out which logical cpu this node rank should start on as we
* must ensure it starts on the right socket
*/
logical_cpu = (nrank / npersocket) * orte_default_num_cores_per_socket;
/* now add an offset within the socket */
logical_cpu += (nrank % npersocket) * jobdat->cpus_per_rank;
} else {
/* my starting core has to be offset by cpus_per_rank */
logical_cpu = nrank * jobdat->cpus_per_rank;
}
for (n=0; n < jobdat->cpus_per_rank; n++) {
/* are we bound? */
if (orte_odls_globals.bound) {
/* if we are bound, then use the logical_cpu as an index
* against our available cores
*/
ncpu = 0;
for (i=0; i < orte_odls_globals.num_processors && ncpu <= logical_cpu; i++) {
if (OPAL_PAFFINITY_CPU_ISSET(i, orte_odls_globals.my_cores)) {
ncpu++;
phys_cpu = i;
}
}
/* if we don't have enough processors, that is an error */
if (ncpu < logical_cpu) {
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-core");
orte_show_help("help-odls-default.txt",
"odls-default:not-enough-processors", true);
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
}
} else {
/* if we are not bound, then all processors are available
* to us, so index into the node's array to get the
* physical cpu
*/
phys_cpu = opal_paffinity_base_get_physical_processor_id(logical_cpu);
if (0 > phys_cpu) {
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-core");
if (ORTE_LOCAL_RANK_INVALID == (lrank = orte_ess.get_local_rank(child->name))) {
orte_show_help("help-odls-default.txt",
"odls-default:invalid-local-rank", true);
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
}
target_socket = opal_paffinity_base_get_physical_socket_id(lrank % orte_odls_globals.num_sockets);
if (ORTE_ERR_NOT_SUPPORTED == target_socket) {
/* OS does not support providing topology information */
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-core");
orte_show_help("help-odls-default.txt",
"odls-default:topo-not-supported",
true, orte_process_info.nodename, "bind-to-core", "",
context->app);
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
}
OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output,
"bysocket lrank %d numsocks %d logical socket %d target socket %d", (int)lrank,
(int)orte_odls_globals.num_sockets,
(int)(lrank % orte_odls_globals.num_sockets),
target_socket));
/* my starting core within this socket has to be offset by cpus_per_rank */
logical_cpu = (lrank / orte_odls_globals.num_sockets) * jobdat->cpus_per_rank;
/* cycle across the cpus_per_rank */
for (n=0; n < jobdat->cpus_per_rank; n++) {
ncpu = logical_cpu + (n * jobdat->stride);
/* get the physical core within this target socket */
phys_core = opal_paffinity_base_get_physical_core_id(target_socket, ncpu);
if (0 > phys_core) {
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
orte_show_help("help-odls-default.txt",
"odls-default:invalid-phys-cpu", true);
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
}
/* map this to a physical cpu on this node */
if (ORTE_SUCCESS != opal_paffinity_base_get_map_to_processor_id(target_socket, phys_core, &phys_cpu)) {
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
orte_show_help("help-odls-default.txt",
"odls-default:invalid-phys-cpu", true);
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
}
/* are we bound? */
if (orte_odls_globals.bound) {
/* see if this physical cpu is available to us */
if (!OPAL_PAFFINITY_CPU_ISSET(phys_cpu, orte_odls_globals.my_cores)) {
/* no it isn't - skip it */
continue;
}
}
OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output,
"%s odls:default:fork mapping phys socket %d core %d to phys_cpu %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
target_socket, phys_core, phys_cpu));
OPAL_PAFFINITY_CPU_SET(phys_cpu, mask);
}
} else {
/* my starting core has to be offset by cpus_per_rank */
logical_cpu = nrank * jobdat->cpus_per_rank;
for (n=0; n < jobdat->cpus_per_rank; n++) {
/* are we bound? */
if (orte_odls_globals.bound) {
/* if we are bound, then use the logical_cpu as an index
* against our available cores
*/
ncpu = 0;
for (i=0; i < orte_odls_globals.num_processors && ncpu <= logical_cpu; i++) {
if (OPAL_PAFFINITY_CPU_ISSET(i, orte_odls_globals.my_cores)) {
ncpu++;
phys_cpu = i;
}
}
/* if we don't have enough processors, that is an error */
if (ncpu < logical_cpu) {
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-core");
orte_show_help("help-odls-default.txt",
"odls-default:not-enough-resources", true,
"processors", orte_process_info.nodename,
"bind-to-core", context->app);
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
}
} else {
/* if we are not bound, then all processors are available
* to us, so index into the node's array to get the
* physical cpu
*/
phys_cpu = opal_paffinity_base_get_physical_processor_id(logical_cpu);
if (0 > phys_cpu) {
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-core");
orte_show_help("help-odls-default.txt",
"odls-default:invalid-phys-cpu", true);
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
}
}
}
OPAL_PAFFINITY_CPU_SET(phys_cpu, mask);
logical_cpu += jobdat->stride;
@ -448,7 +499,9 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
if (n < logical_skt) {
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
orte_show_help("help-odls-default.txt",
"odls-default:not-enough-sockets", true);
"odls-default:not-enough-resources", true,
"sockets", orte_process_info.nodename,
"bind-to-socket", context->app);
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
}
} else {
@ -485,7 +538,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
true, orte_process_info.nodename, "bind-to-socket", "",
context->app);
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
}
}
OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output,
"bysocket lrank %d numsocks %d logical socket %d target socket %d", (int)lrank,
(int)orte_odls_globals.num_sockets,
@ -515,7 +568,9 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
if (ncpu < logical_cpu) {
ORTE_ODLS_IF_BIND_NOT_REQD("bind-to-socket");
orte_show_help("help-odls-default.txt",
"odls-default:not-enough-processors", true);
"odls-default:not-enough-resources", true,
"processors", orte_process_info.nodename,
"bind-to-socket", context->app);
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
}
/* get the physical socket of that cpu */
@ -524,7 +579,9 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
goto LAUNCH_PROCS;
}
orte_show_help("help-odls-default.txt",
"odls-default:not-enough-sockets", true);
"odls-default:topo-not-supported",
true, orte_process_info.nodename, "bind-to-socket", "",
context->app);
ORTE_ODLS_ERROR_OUT(ORTE_ERR_FATAL);
}
} else {

Просмотреть файл

@ -138,8 +138,8 @@ int orte_rmaps_base_open(void)
false, false, -1, &orte_rmaps_base.npersocket);
if (0 < orte_rmaps_base.npersocket) {
ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_NPERXXX);
/* bind to socket, UNLESS the user already specified something else */
ORTE_XSET_BINDING_POLICY(ORTE_BIND_TO_SOCKET);
/* force bind to socket */
ORTE_SET_BINDING_POLICY(ORTE_BIND_TO_SOCKET);
}
/* Do we want to loadbalance the job */

Просмотреть файл

@ -33,6 +33,7 @@
#include "opal/util/argv.h"
#include "orte/util/proc_info.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/runtime/runtime.h"
#include "orte/runtime/orte_globals.h"
@ -348,19 +349,15 @@ int orte_register_params(void)
"Number of processor boards/node (1-256) [default: 1]",
false, false, 1, &value);
orte_default_num_boards = (uint8_t)value;
if (OPAL_SUCCESS != opal_paffinity_base_get_socket_info(&value)) {
value = 1;
}
mca_base_param_reg_int_name("orte", "num_sockets",
"Number of sockets/board (1-256) [default: auto-sensed by mpirun or 1]",
false, false, value, &value);
"Number of sockets/board (1-256)",
false, false, 0, &value);
orte_default_num_sockets_per_board = (uint8_t)value;
if (OPAL_SUCCESS != opal_paffinity_base_get_core_info(0, &value)) {
value = 1;
}
mca_base_param_reg_int_name("orte", "num_cores",
"Number of cores/socket (1-256) [default: auto-sensed by mpirun or 1]",
false, false, value, &value);
"Number of cores/socket (1-256)",
false, false, 0, &value);
orte_default_num_cores_per_socket = (uint8_t)value;
/* cpu allocation specification */