Fixes trac:1383:
Short version: remove opal_paffinity_alone and restore mpi_paffinity_alone. ORTE makes various information available for the MPI layer to decide what it wants to do in terms of processor affinity. Details: * remove opal_paffinity_alone MCA param; restore mpi_paffinity_alone MCA param * move opal_paffinity_slot_list param registration to paffinity base * ompi_mpi_init() calls opal_paffinity_base_slot_list_set(); if that succeeds use that. If no slot list was set, see if mpi_paffinity_alone was set. If so, bind this process to its Node Local Rank (NLR). The NLR is the ORTE-maintained slot ID; if you COMM_SPAWN to a host in this ORTE universe that already has procs on it, the NLR for the new job will start at N (not 0). So this is slightly better than mpi_paffinity_alone in the v1.2 series. * If a slot list is specified *and* mpi_paffinity_alone is set, we display an error and abort. * Remove calls from rmaps/rank_file component to register and lookup opal_paffinity mca params. * Remove code in orte/odls that set affinities - instead, have them just pass a slot_list if it exists. * Cleanup the orte/odls code that determined oversubscribed/want_processor as these were just opposites of each other. This commit was SVN r18874. The following Trac tickets were found above: Ticket 1383 --> https://svn.open-mpi.org/trac/ompi/ticket/1383
Этот коммит содержится в:
родитель
773c92a6eb
Коммит
583bf425c0
ompi/runtime
opal
orte/mca
@ -9,7 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2006 University of Houston. All rights reserved.
|
||||
@ -48,6 +48,7 @@
|
||||
#include "orte/mca/grpcomm/grpcomm.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
|
||||
#if !ORTE_DISABLE_FULL_SUPPORT
|
||||
#include "orte/mca/routed/routed.h"
|
||||
@ -245,6 +246,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
int num_processors;
|
||||
#endif
|
||||
bool orte_setup = false;
|
||||
bool paffinity_enabled = false;
|
||||
|
||||
/* Setup enough to check get/set MCA params */
|
||||
|
||||
@ -330,18 +332,48 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* Setup process affinity */
|
||||
if ( OPAL_SUCCESS == (ret = opal_paffinity_base_slot_list_set((long)ORTE_PROC_MY_NAME->vpid))) {
|
||||
/* If we were able to set processor affinity, try setting up memory affinity */
|
||||
/* Setup process affinity. First check to see if a slot list was
|
||||
specified. If so, use it. If no slot list was specified,
|
||||
that's not an error -- just fall through and try the next
|
||||
paffinity scheme. */
|
||||
ret = opal_paffinity_base_slot_list_set((long)ORTE_PROC_MY_NAME->vpid);
|
||||
if (OPAL_SUCCESS == ret) {
|
||||
paffinity_enabled = true;
|
||||
}
|
||||
/* If an error occurred in the slot list setup (other than "there
|
||||
was not slot list specified"), bail. */
|
||||
else if (OPAL_ERR_NOT_FOUND != ret) {
|
||||
error = "opal_paffinity_base_slot_list_set() returned an error";
|
||||
goto error;
|
||||
}
|
||||
/* It's an error if multiple paffinity schemes were specified */
|
||||
if (paffinity_enabled && ompi_mpi_paffinity_alone) {
|
||||
ret = OMPI_ERR_BAD_PARAM;
|
||||
error = "Multiple processor affinity schemes specified (can only specify one)";
|
||||
goto error;
|
||||
}
|
||||
/* Otherwise, if mpi_paffinity_alone was set, use that scheme */
|
||||
else if (ompi_mpi_paffinity_alone) {
|
||||
opal_paffinity_base_cpu_set_t mask;
|
||||
OPAL_PAFFINITY_CPU_ZERO(mask);
|
||||
OPAL_PAFFINITY_CPU_SET(orte_ess.get_node_rank(ORTE_PROC_MY_NAME),
|
||||
mask);
|
||||
ret = opal_paffinity_base_set(mask);
|
||||
if (OPAL_SUCCESS != ret) {
|
||||
error = "Setting processor affinity failed";
|
||||
goto error;
|
||||
}
|
||||
paffinity_enabled = true;
|
||||
}
|
||||
|
||||
/* If we were able to set processor affinity, try setting up
|
||||
memory affinity */
|
||||
if (paffinity_enabled) {
|
||||
if (OPAL_SUCCESS == opal_maffinity_base_open() &&
|
||||
OPAL_SUCCESS == opal_maffinity_base_select()) {
|
||||
OPAL_SUCCESS == opal_maffinity_base_select()) {
|
||||
ompi_mpi_maffinity_setup = true;
|
||||
}
|
||||
}
|
||||
if ( OPAL_ERROR == ret ){
|
||||
error = "opal_paffinity_base_slot_list_set: error slot_list assigning";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* initialize datatypes. This step should be done early as it will
|
||||
* create the local convertor and local arch used in the proc
|
||||
|
@ -47,6 +47,7 @@ int ompi_debug_show_mpi_alloc_mem_leaks = 0;
|
||||
bool ompi_debug_no_free_handles = false;
|
||||
bool ompi_mpi_show_mca_params = false;
|
||||
char *ompi_mpi_show_mca_params_file = NULL;
|
||||
bool ompi_mpi_paffinity_alone = false;
|
||||
bool ompi_mpi_abort_print_stack = false;
|
||||
int ompi_mpi_abort_delay = 0;
|
||||
bool ompi_mpi_keep_peer_hostnames = true;
|
||||
@ -228,10 +229,11 @@ int ompi_mpi_register_params(void)
|
||||
true);
|
||||
}
|
||||
|
||||
/* Paffinity alone -- make mpi_paffinity_alone a synonym for
|
||||
opal_paffinity_alone */
|
||||
index = mca_base_param_find("opal", NULL, "paffinity_alone");
|
||||
mca_base_param_reg_syn_name(index, "mpi", "paffinity_alone", false);
|
||||
mca_base_param_reg_int_name("mpi", "paffinity_alone",
|
||||
"If nonzero, assume that this job is the only (set of) process(es) running on each node and bind processes to processors, starting with processor ID 0",
|
||||
false, false,
|
||||
(int) ompi_mpi_paffinity_alone, &value);
|
||||
ompi_mpi_paffinity_alone = OPAL_INT_TO_BOOL(value);
|
||||
|
||||
/* Sparse group storage support */
|
||||
|
||||
|
@ -11,7 +11,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -94,8 +94,13 @@ OMPI_DECLSPEC extern bool ompi_mpi_show_mca_params;
|
||||
OMPI_DECLSPEC extern char * ompi_mpi_show_mca_params_file;
|
||||
|
||||
/**
|
||||
* If this value is true, we can check process binding to CPU
|
||||
* If this value is true, assume that this ORTE job is the only job
|
||||
* running on the nodes that have been allocated to it, and bind
|
||||
* processes to the processor ID corresponding to their node local
|
||||
* rank (if you COMM_SPAWN on to empty processors on the same node,
|
||||
* the NLR will start at N, not 0).
|
||||
*/
|
||||
OMPI_DECLSPEC extern bool ompi_mpi_paffinity_alone;
|
||||
|
||||
/**
|
||||
* Whether we should keep the string hostnames of all the MPI
|
||||
|
@ -9,6 +9,7 @@
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -65,6 +66,10 @@ int opal_paffinity_base_open(void)
|
||||
}
|
||||
|
||||
opal_paffinity_base_components_opened_valid = false;
|
||||
|
||||
mca_base_param_reg_string_name("opal", "paffinity_base_slot_list",
|
||||
"Used to set list of processor IDs to bind MPI processes to (e.g., used in conjunction with rank files)",
|
||||
true, false, NULL, NULL);
|
||||
|
||||
/* Open up all available components */
|
||||
|
||||
|
@ -340,16 +340,20 @@ int opal_paffinity_base_slot_list_set(long rank)
|
||||
int item_cnt, socket_core_cnt, rc;
|
||||
|
||||
rc = mca_base_param_find("opal", NULL, "paffinity_slot_list");
|
||||
if (rc >= 0) {
|
||||
if (OPAL_SUCCESS == mca_base_param_lookup_string(rc, &slot_str)) {
|
||||
if (NULL == slot_str) {
|
||||
return OPAL_ERR_BAD_PARAM;
|
||||
}
|
||||
/* If there was not slot list specified, return a specific error
|
||||
code indicating that */
|
||||
if (rc <= 0) {
|
||||
return OPAL_ERR_NOT_FOUND;
|
||||
}
|
||||
|
||||
if (OPAL_SUCCESS == mca_base_param_lookup_string(rc, &slot_str)) {
|
||||
if (NULL == slot_str) {
|
||||
return OPAL_ERR_BAD_PARAM;
|
||||
}
|
||||
}
|
||||
if (0 == strcmp("", slot_str)){
|
||||
return OPAL_ERR_BAD_PARAM;
|
||||
}
|
||||
if (0 == strcmp("", slot_str)){
|
||||
return OPAL_ERR_BAD_PARAM;
|
||||
}
|
||||
opal_output_verbose(5, opal_paffinity_base_output, "paffinity slot assignment: slot_list == %s", slot_str);
|
||||
|
||||
item = opal_argv_split (slot_str, ',');
|
||||
|
@ -11,7 +11,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
*
|
||||
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -70,14 +70,6 @@ int opal_register_params(void)
|
||||
mca_base_param_reg_string_name("opal", "signal",
|
||||
"If a signal is received, display the stack trace frame",
|
||||
false, false, string, NULL);
|
||||
|
||||
mca_base_param_reg_string_name("opal","paffinity_slot_list",
|
||||
"Used to set list of slots to be bind to",
|
||||
false,false, NULL, NULL);
|
||||
|
||||
mca_base_param_reg_int_name("opal", "paffinity_alone",
|
||||
"If nonzero, assume that this job is the only (set of) process(es) running on each node and bind processes to processors, starting with processor ID 0",
|
||||
false, false, (int)false, 0);
|
||||
free(string);
|
||||
}
|
||||
|
||||
|
@ -756,8 +756,8 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
|
||||
orte_app_context_t *app, **apps;
|
||||
orte_std_cntr_t num_apps;
|
||||
orte_odls_child_t *child=NULL;
|
||||
int i, num_processors, int_value;
|
||||
bool want_processor, oversubscribed;
|
||||
int i, num_processors;
|
||||
bool oversubscribed;
|
||||
int rc=ORTE_SUCCESS, ret;
|
||||
bool launch_failed=true;
|
||||
opal_buffer_t alert;
|
||||
@ -811,55 +811,21 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
|
||||
}
|
||||
}
|
||||
|
||||
/* setup for processor affinity. If there are enough physical processors on this node, then
|
||||
* we indicate which processor each process should be assigned to, IFF the user has requested
|
||||
* processor affinity be used - the paffinity subsystem will make that final determination. All
|
||||
* we do here is indicate that we should do the definitions just in case paffinity is active
|
||||
*/
|
||||
if (OPAL_SUCCESS != opal_get_num_processors(&num_processors)) {
|
||||
/* if we cannot find the number of local processors, then default to conservative
|
||||
* settings
|
||||
if (opal_list_get_size(&orte_odls_globals.children) > (size_t)num_processors) {
|
||||
/* if the #procs > #processors, declare us oversubscribed. This
|
||||
* covers the case where the user didn't tell us anything about the
|
||||
* number of available slots, so we defaulted to a value of 1
|
||||
*/
|
||||
want_processor = false; /* default to not being a hog */
|
||||
oversubscribed = true;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:launch could not get number of processors - using conservative settings",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
} else {
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:launch got %ld processors",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)num_processors));
|
||||
|
||||
/* grab a processor if we can */
|
||||
if (opal_list_get_size(&orte_odls_globals.children) > (size_t)num_processors) {
|
||||
want_processor = false;
|
||||
} else {
|
||||
want_processor = true;
|
||||
}
|
||||
|
||||
if (opal_list_get_size(&orte_odls_globals.children) > (size_t)num_processors) {
|
||||
/* if the #procs > #processors, declare us oversubscribed regardless
|
||||
* of what the mapper claimed - the user may have told us something
|
||||
* incorrect
|
||||
*/
|
||||
oversubscribed = true;
|
||||
} else {
|
||||
/* likewise, if there are more processors here than we were told,
|
||||
* declare us to not be oversubscribed so we can be aggressive. This
|
||||
* covers the case where the user didn't tell us anything about the
|
||||
* number of available slots, so we defaulted to a value of 1
|
||||
*/
|
||||
oversubscribed = false;
|
||||
}
|
||||
/* otherwise, declare us to not be oversubscribed so we can be aggressive */
|
||||
oversubscribed = false;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
|
||||
"%s odls:launch oversubscribed set to %s want_processor set to %s",
|
||||
"%s odls:launch oversubscribed set to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
oversubscribed ? "true" : "false", want_processor ? "true" : "false"));
|
||||
oversubscribed ? "true" : "false"));
|
||||
|
||||
/* setup to report the proc state to the HNP */
|
||||
OBJ_CONSTRUCT(&alert, opal_buffer_t);
|
||||
@ -1000,30 +966,12 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
|
||||
opal_setenv("OMPI_COMM_WORLD_LOCAL_RANK", value, true, &app->env);
|
||||
free(value);
|
||||
|
||||
{ /* unset paffinity_slot_list environment */
|
||||
param = mca_base_param_environ_variable("opal", NULL, "paffinity_slot_list");
|
||||
opal_unsetenv(param, &app->env);
|
||||
free(param);
|
||||
}
|
||||
if ( NULL != child->slot_list ) {
|
||||
param = mca_base_param_environ_variable("opal", NULL, "paffinity_slot_list");
|
||||
asprintf(&value, "%s", child->slot_list);
|
||||
opal_setenv(param, value, true, &app->env);
|
||||
free(param);
|
||||
free(value);
|
||||
} else if (want_processor) { /* setting paffinity_alone */
|
||||
int parameter = mca_base_param_find("opal", NULL, "paffinity_alone");
|
||||
if ( parameter >=0 ) {
|
||||
int_value = 0;
|
||||
mca_base_param_lookup_int(parameter, &int_value);
|
||||
if ( int_value ){
|
||||
param = mca_base_param_environ_variable("opal", NULL, "paffinity_slot_list");
|
||||
asprintf(&value, "%lu", (unsigned long) proc_rank);
|
||||
opal_setenv(param, value, true, &app->env);
|
||||
free(value);
|
||||
free(param);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* must unlock prior to fork to keep things clean in the
|
||||
|
@ -74,7 +74,6 @@ orte_rmaps_rank_file_component_t mca_rmaps_rank_file_component = {
|
||||
*/
|
||||
static int orte_rmaps_rank_file_open(void)
|
||||
{
|
||||
int index, paffinity_alone = 0;
|
||||
mca_rmaps_rank_file_component.priority = 0;
|
||||
|
||||
mca_base_param_reg_string(&mca_rmaps_rank_file_component.super.base_version,
|
||||
@ -85,24 +84,6 @@ static int orte_rmaps_rank_file_open(void)
|
||||
mca_rmaps_rank_file_component.priority = 100;
|
||||
}
|
||||
|
||||
index = mca_base_param_find("opal", NULL, "paffinity_slot_list");
|
||||
if (index >= 0) {
|
||||
if (OPAL_SUCCESS == mca_base_param_lookup_string(index, &orte_mca_rmaps_rank_file_slot_list)) {
|
||||
if (NULL != orte_mca_rmaps_rank_file_slot_list) {
|
||||
mca_rmaps_rank_file_component.priority = 100;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
index = mca_base_param_find("opal", NULL, "paffinity_alone");
|
||||
if (index >= 0) {
|
||||
if (OPAL_SUCCESS == mca_base_param_lookup_int(index, &paffinity_alone)) {
|
||||
if ( 100 == mca_rmaps_rank_file_component.priority && paffinity_alone ){
|
||||
opal_output(0, "WARNING: paffinity_alone cannot be set with paffinity_slot_list or rank_file\nTherefore mca_rmaps_rank_file_component.priority set to 0\n");
|
||||
mca_rmaps_rank_file_component.priority = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user