Per the discussion on the devel list, move the binding of processes to processors from MPI_Init to process start. This involves:
1. replacing mpi_paffinity_alone with opal_paffinity_alone - for back-compatibility, I have aliased mpi_paffinity_alone to the new param name. This caus es a mild abstraction break in the opal/mca/paffinity framework - per the devel discussion...live with it. :-) I also moved the ompi_xxx global variable that tracked maffinity setup so it could be properly closed in MPI_Finalize to the opal/mca/maffinity framework to avoid an abstraction break. 2. Added code to the odls/default module to perform paffinity binding and maffinity init between process fork and exec. This has been tested on IU's odi n cluster and works for both MPI and non-MPI apps. 3. Revise MPI_Init to detect if affinity has already been set, and to attempt to set it if not already done. I have *not* tested this as I haven't yet f igured out a way to do so - I couldn't get slurm to perform cpu bindings, even though it supposedly does do so. This has only been lightly tested and would definitely benefit from a wider range of evaluation... This commit was SVN r21209.
Этот коммит содержится в:
родитель
fa839f4a30
Коммит
d396f0a6fc
@ -38,7 +38,7 @@ WARNING: Cannot set both the MCA parameters mpi_leave_pinned and
|
||||
mpi_leave_pinned_pipeline to "true". Defaulting to mpi_leave_pinned
|
||||
ONLY.
|
||||
[mpi_init:startup:paffinity-unavailable]
|
||||
The MCA parameter "mpi_paffinity_alone" was set to a nonzero value,
|
||||
The MCA parameter "opal_paffinity_alone" was set to a nonzero value,
|
||||
but Open MPI was unable to bind MPI_COMM_WORLD rank %s to a processor.
|
||||
|
||||
Typical causes for this problem include:
|
||||
|
@ -57,10 +57,6 @@ OMPI_DECLSPEC extern int ompi_mpi_thread_provided;
|
||||
/** Identifier of the main thread */
|
||||
OMPI_DECLSPEC extern struct opal_thread_t *ompi_mpi_main_thread;
|
||||
|
||||
/** Did we setup maffinity in MPI_INIT (and therefore need to shut
|
||||
it down during MPI_FINALIZE)? */
|
||||
OMPI_DECLSPEC extern bool ompi_mpi_maffinity_setup;
|
||||
|
||||
/** Do we want to be warned on fork or not? */
|
||||
OMPI_DECLSPEC extern bool ompi_warn_on_fork;
|
||||
|
||||
|
@ -144,7 +144,7 @@ int ompi_mpi_finalize(void)
|
||||
opal_progress_event_users_increment();
|
||||
|
||||
/* If maffinity was setup, tear it down */
|
||||
if (ompi_mpi_maffinity_setup) {
|
||||
if (opal_maffinity_setup) {
|
||||
opal_maffinity_base_close();
|
||||
}
|
||||
|
||||
|
@ -280,6 +280,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
int param, value;
|
||||
struct timeval ompistart, ompistop;
|
||||
char *event_val = NULL;
|
||||
opal_paffinity_base_cpu_set_t mask;
|
||||
#if 0
|
||||
/* see comment below about sched_yield */
|
||||
int num_processors;
|
||||
@ -395,56 +396,60 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
orte_process_info.pid);
|
||||
}
|
||||
|
||||
/* Setup process affinity. First check to see if a slot list was
|
||||
specified. If so, use it. If no slot list was specified,
|
||||
that's not an error -- just fall through and try the next
|
||||
paffinity scheme. */
|
||||
ret = opal_paffinity_base_slot_list_set((long)ORTE_PROC_MY_NAME->vpid);
|
||||
if (OPAL_SUCCESS == ret) {
|
||||
paffinity_enabled = true;
|
||||
}
|
||||
/* If an error occurred in the slot list setup (other than "there
|
||||
was not slot list specified"), bail. */
|
||||
else if (OPAL_ERR_NOT_FOUND != ret) {
|
||||
error = "opal_paffinity_base_slot_list_set() returned an error";
|
||||
goto error;
|
||||
}
|
||||
/* It's an error if multiple paffinity schemes were specified */
|
||||
if (paffinity_enabled && ompi_mpi_paffinity_alone) {
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
error = "Multiple processor affinity schemes specified (can only specify one)";
|
||||
goto error;
|
||||
}
|
||||
/* Otherwise, if mpi_paffinity_alone was set, use that scheme */
|
||||
else if (ompi_mpi_paffinity_alone) {
|
||||
opal_paffinity_base_cpu_set_t mask;
|
||||
int phys_cpu;
|
||||
orte_node_rank_t nrank;
|
||||
if (ORTE_NODE_RANK_INVALID == (nrank = orte_ess.get_node_rank(ORTE_PROC_MY_NAME))) {
|
||||
error = "Could not get node rank - cannot set processor affinity";
|
||||
goto error;
|
||||
/* if it hasn't already been done, setup process affinity.
|
||||
* First check to see if a slot list was
|
||||
* specified. If so, use it. If no slot list was specified,
|
||||
* that's not an error -- just fall through and try the next
|
||||
* paffinity scheme.
|
||||
*/
|
||||
ret = opal_paffinity_base_get(&mask);
|
||||
if (OPAL_ERR_NOT_FOUND == ret) {
|
||||
/* the system is capable of doing processor affinity, but it
|
||||
* has not yet been set - see if a slot_list was given
|
||||
*/
|
||||
if (NULL != opal_paffinity_base_slot_list) {
|
||||
/* It's an error if multiple paffinity schemes were specified */
|
||||
if (opal_paffinity_alone) {
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
error = "Multiple processor affinity schemes specified (can only specify one)";
|
||||
goto error;
|
||||
}
|
||||
ret = opal_paffinity_base_slot_list_set((long)ORTE_PROC_MY_NAME->vpid, opal_paffinity_base_slot_list);
|
||||
if (OPAL_ERR_NOT_FOUND != ret) {
|
||||
error = "opal_paffinity_base_slot_list_set() returned an error";
|
||||
goto error;
|
||||
}
|
||||
paffinity_enabled = true;
|
||||
} else if (opal_paffinity_alone) {
|
||||
/* no slot_list, but they asked for paffinity */
|
||||
int phys_cpu;
|
||||
orte_node_rank_t nrank;
|
||||
if (ORTE_NODE_RANK_INVALID == (nrank = orte_ess.get_node_rank(ORTE_PROC_MY_NAME))) {
|
||||
error = "Could not get node rank - cannot set processor affinity";
|
||||
goto error;
|
||||
}
|
||||
OPAL_PAFFINITY_CPU_ZERO(mask);
|
||||
phys_cpu = opal_paffinity_base_get_physical_processor_id(nrank);
|
||||
if (0 > phys_cpu) {
|
||||
error = "Could not get physical processor id - cannot set processor affinity";
|
||||
goto error;
|
||||
}
|
||||
OPAL_PAFFINITY_CPU_SET(phys_cpu, mask);
|
||||
ret = opal_paffinity_base_set(mask);
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
error = "Setting processor affinity failed";
|
||||
goto error;
|
||||
}
|
||||
paffinity_enabled = true;
|
||||
}
|
||||
OPAL_PAFFINITY_CPU_ZERO(mask);
|
||||
phys_cpu = opal_paffinity_base_get_physical_processor_id(nrank);
|
||||
if (0 > phys_cpu) {
|
||||
error = "Could not get physical processor id - cannot set processor affinity";
|
||||
goto error;
|
||||
}
|
||||
OPAL_PAFFINITY_CPU_SET(phys_cpu, mask);
|
||||
ret = opal_paffinity_base_set(mask);
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
error = "Setting processor affinity failed";
|
||||
goto error;
|
||||
}
|
||||
paffinity_enabled = true;
|
||||
}
|
||||
|
||||
/* If we were able to set processor affinity, try setting up
|
||||
memory affinity */
|
||||
if (paffinity_enabled) {
|
||||
if (!opal_maffinity_setup && paffinity_enabled) {
|
||||
if (OPAL_SUCCESS == opal_maffinity_base_open() &&
|
||||
OPAL_SUCCESS == opal_maffinity_base_select()) {
|
||||
ompi_mpi_maffinity_setup = true;
|
||||
opal_maffinity_setup = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -52,7 +52,6 @@ int ompi_debug_show_mpi_alloc_mem_leaks = 0;
|
||||
bool ompi_debug_no_free_handles = false;
|
||||
bool ompi_mpi_show_mca_params = false;
|
||||
char *ompi_mpi_show_mca_params_file = NULL;
|
||||
bool ompi_mpi_paffinity_alone = false;
|
||||
bool ompi_mpi_abort_print_stack = false;
|
||||
int ompi_mpi_abort_delay = 0;
|
||||
bool ompi_mpi_keep_peer_hostnames = true;
|
||||
@ -261,12 +260,6 @@ int ompi_mpi_register_params(void)
|
||||
true);
|
||||
}
|
||||
|
||||
mca_base_param_reg_int_name("mpi", "paffinity_alone",
|
||||
"If nonzero, assume that this job is the only (set of) process(es) running on each node and bind processes to processors, starting with processor ID 0",
|
||||
false, false,
|
||||
(int) ompi_mpi_paffinity_alone, &value);
|
||||
ompi_mpi_paffinity_alone = OPAL_INT_TO_BOOL(value);
|
||||
|
||||
mca_base_param_reg_int_name("mpi", "warn_on_fork",
|
||||
"If nonzero, issue a warning if program forks under conditions that could cause system errors",
|
||||
false, false,
|
||||
|
@ -95,15 +95,6 @@ OMPI_DECLSPEC extern bool ompi_mpi_show_mca_params;
|
||||
*/
|
||||
OMPI_DECLSPEC extern char * ompi_mpi_show_mca_params_file;
|
||||
|
||||
/**
|
||||
* If this value is true, assume that this ORTE job is the only job
|
||||
* running on the nodes that have been allocated to it, and bind
|
||||
* processes to the processor ID corresponding to their node local
|
||||
* rank (if you COMM_SPAWN on to empty processors on the same node,
|
||||
* the NLR will start at N, not 0).
|
||||
*/
|
||||
OMPI_DECLSPEC extern bool ompi_mpi_paffinity_alone;
|
||||
|
||||
/**
|
||||
* Whether we should keep the string hostnames of all the MPI
|
||||
* process peers around or not (eats up a good bit of memory).
|
||||
|
@ -154,7 +154,12 @@ OPAL_DECLSPEC extern opal_list_t opal_maffinity_base_components_opened;
|
||||
* Debugging output stream
|
||||
*/
|
||||
extern int opal_maffinity_base_output;
|
||||
|
||||
|
||||
/**
|
||||
* Flag to indicate whether or not maffinity was setup
|
||||
*/
|
||||
OPAL_DECLSPEC extern bool opal_maffinity_setup;
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* OPAL_BASE_MAFFINITY_H */
|
||||
|
@ -42,7 +42,7 @@
|
||||
int opal_maffinity_base_output = -1;
|
||||
bool opal_maffinity_base_components_opened_valid = false;
|
||||
opal_list_t opal_maffinity_base_components_opened;
|
||||
|
||||
bool opal_maffinity_setup = false;
|
||||
|
||||
/*
|
||||
* Function for finding and opening either all MCA components, or the one
|
||||
|
@ -28,36 +28,32 @@
|
||||
|
||||
#include <sys/types.h>
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/**
|
||||
* Struct used with opal_maffinity_base_module_set_fn_t. It
|
||||
* describes a section of memory (starting address and length).
|
||||
* This is really the same thing as an iovec, but we include a
|
||||
* separate type for it for at least 2 reasons:
|
||||
*
|
||||
* 1. Some OS's iovec definitions are exceedingly lame (e.g.,
|
||||
* Solaris 9 has the length argument as an int, instead of a
|
||||
* size_t).
|
||||
*
|
||||
* 2. We reserve the right to expand/change this struct in the
|
||||
* future.
|
||||
*/
|
||||
struct opal_maffinity_base_segment_t {
|
||||
/** Starting address of segment */
|
||||
void *mbs_start_addr;
|
||||
/** Length of segment */
|
||||
size_t mbs_len;
|
||||
};
|
||||
/**
|
||||
* Convenience typedef
|
||||
*/
|
||||
typedef struct opal_maffinity_base_segment_t opal_maffinity_base_segment_t;
|
||||
/**
|
||||
* Struct used with opal_maffinity_base_module_set_fn_t. It
|
||||
* describes a section of memory (starting address and length).
|
||||
* This is really the same thing as an iovec, but we include a
|
||||
* separate type for it for at least 2 reasons:
|
||||
*
|
||||
* 1. Some OS's iovec definitions are exceedingly lame (e.g.,
|
||||
* Solaris 9 has the length argument as an int, instead of a
|
||||
* size_t).
|
||||
*
|
||||
* 2. We reserve the right to expand/change this struct in the
|
||||
* future.
|
||||
*/
|
||||
struct opal_maffinity_base_segment_t {
|
||||
/** Starting address of segment */
|
||||
void *mbs_start_addr;
|
||||
/** Length of segment */
|
||||
size_t mbs_len;
|
||||
};
|
||||
/**
|
||||
* Convenience typedef
|
||||
*/
|
||||
typedef struct opal_maffinity_base_segment_t opal_maffinity_base_segment_t;
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* OPAL_MAFFINITY_TYPES_H */
|
||||
|
@ -237,13 +237,20 @@ OPAL_DECLSPEC extern opal_list_t opal_paffinity_base_components_opened;
|
||||
/**
|
||||
* Assigning slot_list to process
|
||||
*/
|
||||
OPAL_DECLSPEC int opal_paffinity_base_slot_list_set(long rank);
|
||||
OPAL_DECLSPEC int opal_paffinity_base_slot_list_set(long rank, char *slot_str);
|
||||
|
||||
/**
|
||||
* Debugging output stream
|
||||
*/
|
||||
OPAL_DECLSPEC extern int opal_paffinity_base_output;
|
||||
|
||||
/**
|
||||
* Flag indicating whether or not processor affinity is to be enabled
|
||||
*/
|
||||
OPAL_DECLSPEC extern bool opal_paffinity_alone;
|
||||
|
||||
OPAL_DECLSPEC extern char *opal_paffinity_base_slot_list;
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* OPAL_BASE_PAFFINITY_H */
|
||||
|
@ -43,7 +43,8 @@
|
||||
OPAL_DECLSPEC int opal_paffinity_base_output = -1;
|
||||
bool opal_paffinity_base_components_opened_valid = false;
|
||||
opal_list_t opal_paffinity_base_components_opened;
|
||||
|
||||
bool opal_paffinity_alone = false;
|
||||
char *opal_paffinity_base_slot_list;
|
||||
|
||||
/*
|
||||
* Function for finding and opening either all MCA components, or the one
|
||||
@ -51,7 +52,7 @@ opal_list_t opal_paffinity_base_components_opened;
|
||||
*/
|
||||
int opal_paffinity_base_open(void)
|
||||
{
|
||||
int value;
|
||||
int value, id;
|
||||
|
||||
/* Debugging / verbose output */
|
||||
|
||||
@ -65,11 +66,19 @@ int opal_paffinity_base_open(void)
|
||||
opal_paffinity_base_output = -1;
|
||||
}
|
||||
|
||||
id = mca_base_param_reg_int_name("opal", "paffinity_alone",
|
||||
"If nonzero, assume that this job is the only (set of) process(es) running on each node and bind processes to processors, starting with processor ID 0",
|
||||
false, false,
|
||||
0, NULL);
|
||||
mca_base_param_reg_syn_name(id, "mpi", "paffinity_alone", true);
|
||||
mca_base_param_lookup_int(id, &value);
|
||||
opal_paffinity_alone = OPAL_INT_TO_BOOL(value);
|
||||
|
||||
opal_paffinity_base_components_opened_valid = false;
|
||||
|
||||
mca_base_param_reg_string_name("opal", "paffinity_base_slot_list",
|
||||
"Used to set list of processor IDs to bind MPI processes to (e.g., used in conjunction with rank files)",
|
||||
true, false, NULL, NULL);
|
||||
true, false, NULL, &opal_paffinity_base_slot_list);
|
||||
|
||||
/* Open up all available components */
|
||||
|
||||
|
@ -512,29 +512,22 @@ static int opal_paffinity_base_socket_core_to_cpu_set(char **socket_core_list, i
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
int opal_paffinity_base_slot_list_set(long rank)
|
||||
int opal_paffinity_base_slot_list_set(long rank, char *slot_str)
|
||||
{
|
||||
char *slot_str = NULL;
|
||||
char **item;
|
||||
char **socket_core;
|
||||
int item_cnt, socket_core_cnt, rc;
|
||||
bool logical_map;
|
||||
|
||||
rc = mca_base_param_find("opal", NULL, "paffinity_base_slot_list");
|
||||
/* If there was not slot list specified, return a specific error
|
||||
code indicating that */
|
||||
if (rc <= 0) {
|
||||
return OPAL_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
if (OPAL_SUCCESS == mca_base_param_lookup_string(rc, &slot_str)) {
|
||||
if (NULL == slot_str) {
|
||||
return OPAL_ERR_NOT_FOUND;
|
||||
}
|
||||
}
|
||||
if (0 == strcmp("", slot_str)){
|
||||
if (NULL == slot_str){
|
||||
return OPAL_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* if the slot string is empty, that is an error */
|
||||
if (0 == strlen(slot_str)) {
|
||||
return OPAL_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* check for diag request to avoid repeatedly doing so */
|
||||
if (4 < opal_output_get_verbosity(opal_paffinity_base_output)) {
|
||||
diag_requested = true;
|
||||
|
@ -1285,16 +1285,6 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
|
||||
opal_setenv("OMPI_COMM_WORLD_LOCAL_RANK", value, true, &app->env);
|
||||
free(value);
|
||||
|
||||
param = mca_base_param_environ_variable("opal", NULL, "paffinity_base_slot_list");
|
||||
if ( NULL != child->slot_list ) {
|
||||
asprintf(&value, "%s", child->slot_list);
|
||||
opal_setenv(param, value, true, &app->env);
|
||||
free(value);
|
||||
} else {
|
||||
opal_unsetenv(param, &app->env);
|
||||
}
|
||||
free(param);
|
||||
|
||||
/* if we are timing things, record when we are going to launch this proc */
|
||||
if (orte_timing) {
|
||||
gettimeofday(&child->starttime, NULL);
|
||||
|
@ -55,3 +55,36 @@ Max value allowed: %ld
|
||||
This may be resolved by increasing the number of available node id's by
|
||||
re-configuring Open MPI with the --enable-jumbo-clusters option, and then
|
||||
re-running the application
|
||||
#
|
||||
[odls-default:multiple-paffinity-schemes]
|
||||
Multiple processor affinity schemes were specified (can only specify one):
|
||||
|
||||
Slot list: %s
|
||||
opal_paffinity_alone: true
|
||||
|
||||
Please specify only the one desired method.
|
||||
#
|
||||
[odls-default:slot-list-failed]
|
||||
We were unable to successfully process/set the requested processor
|
||||
affinity settings:
|
||||
|
||||
Specified slot list: %s
|
||||
Error: %s
|
||||
|
||||
This could mean that a non-existent processor was specified, or
|
||||
that the specification had improper syntax.
|
||||
#
|
||||
[odls-default:invalid-node-rank]
|
||||
An invalid node rank was obtained - this is probably something
|
||||
that should be reported to the OMPI developers.
|
||||
#
|
||||
[odls-default:invalid-phys-cpu]
|
||||
An invalid physical processor id was returned when attempting to
|
||||
set processor affinity. This is probably something that should be
|
||||
reported to the OMPI developers - your system may not support
|
||||
this functionality.
|
||||
#
|
||||
[odls-default:failed-set-paff]
|
||||
An attempt to set processor affinity has failed - please check to
|
||||
ensure that your system supports such functionality. If so, then
|
||||
this is probably something that should be reported to the OMPI developers.
|
||||
|
@ -67,10 +67,14 @@
|
||||
#endif
|
||||
#endif /* HAVE_SCHED_YIELD */
|
||||
|
||||
#include "opal/mca/maffinity/base/base.h"
|
||||
#include "opal/mca/paffinity/base/base.h"
|
||||
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/mca/iof/base/iof_base_setup.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
|
||||
@ -178,6 +182,7 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
||||
sigset_t sigs;
|
||||
int i, p[2];
|
||||
pid_t pid;
|
||||
bool paffinity_enabled = false;
|
||||
|
||||
if (NULL != child) {
|
||||
/* should pull this information from MPIRUN instead of going with
|
||||
@ -259,7 +264,75 @@ static int odls_default_fork_local_proc(orte_app_context_t* context,
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
/* Setup process affinity. First check to see if a slot list was
|
||||
* specified. If so, use it. If no slot list was specified,
|
||||
* that's not an error -- just fall through and try the next
|
||||
* paffinity scheme.
|
||||
*/
|
||||
if (NULL != child->slot_list) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output,
|
||||
"%s odls:default:fork got slot_list %s for child %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
child->slot_list, ORTE_NAME_PRINT(child->name)));
|
||||
if (opal_paffinity_alone) {
|
||||
/* It's an error if multiple paffinity schemes were specified */
|
||||
orte_show_help("help-odls-default.txt",
|
||||
"odls-default:multiple-paffinity-schemes", true, child->slot_list);
|
||||
rc = ORTE_ERR_FATAL;
|
||||
write(p[1], &rc, sizeof(int));
|
||||
exit(1);
|
||||
}
|
||||
if (OPAL_SUCCESS != (rc = opal_paffinity_base_slot_list_set((long)child->name->vpid, child->slot_list))) {
|
||||
orte_show_help("help-odls-default.txt",
|
||||
"odls-default:slot-list-failed", true, child->slot_list, ORTE_ERROR_NAME(rc));
|
||||
write(p[1], &rc, sizeof(int));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
/* Otherwise, if opal_paffinity_alone was set, use that scheme */
|
||||
else if (opal_paffinity_alone) {
|
||||
opal_paffinity_base_cpu_set_t mask;
|
||||
int phys_cpu;
|
||||
orte_node_rank_t nrank;
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_odls_globals.output,
|
||||
"%s odls:default:fork setting paffinity for child %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(child->name)));
|
||||
if (ORTE_NODE_RANK_INVALID == (nrank = orte_ess.get_node_rank(child->name))) {
|
||||
orte_show_help("help-odls-default.txt",
|
||||
"odls-default:invalid-node-rank", true);
|
||||
rc = ORTE_ERR_FATAL;
|
||||
write(p[1], &rc, sizeof(int));
|
||||
exit(1);
|
||||
}
|
||||
OPAL_PAFFINITY_CPU_ZERO(mask);
|
||||
phys_cpu = opal_paffinity_base_get_physical_processor_id(nrank);
|
||||
if (0 > phys_cpu) {
|
||||
orte_show_help("help-odls-default.txt",
|
||||
"odls-default:invalid-phys-cpu", true);
|
||||
rc = ORTE_ERR_FATAL;
|
||||
write(p[1], &rc, sizeof(int));
|
||||
exit(1);
|
||||
}
|
||||
OPAL_PAFFINITY_CPU_SET(phys_cpu, mask);
|
||||
if (OPAL_SUCCESS != (rc = opal_paffinity_base_set(mask))) {
|
||||
orte_show_help("help-odls-default.txt",
|
||||
"odls-default:failed-set-paff", true);
|
||||
write(p[1], &rc, sizeof(int));
|
||||
exit(1);
|
||||
}
|
||||
paffinity_enabled = true;
|
||||
}
|
||||
/* If we were able to set processor affinity, try setting up
|
||||
* memory affinity
|
||||
*/
|
||||
if (paffinity_enabled) {
|
||||
if (OPAL_SUCCESS == opal_maffinity_base_open() &&
|
||||
OPAL_SUCCESS == opal_maffinity_base_select()) {
|
||||
opal_maffinity_setup = true;
|
||||
}
|
||||
}
|
||||
|
||||
} else if (!(ORTE_JOB_CONTROL_FORWARD_OUTPUT & controls)) {
|
||||
/* tie stdin/out/err/internal to /dev/null */
|
||||
int fdnull;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user