1
1
Short version: remove opal_paffinity_alone and restore
mpi_paffinity_alone.  ORTE makes various information available for the
MPI layer to decide what it wants to do in terms of processor
affinity.

Details:

 * remove opal_paffinity_alone MCA param; restore mpi_paffinity_alone
   MCA param
 * move opal_paffinity_slot_list param registration to paffinity base
 * ompi_mpi_init() calls opal_paffinity_base_slot_list_set(); if that
   succeeds use that.  If no slot list was set, see if
   mpi_paffinity_alone was set.  If so, bind this process to its Node
   Local Rank (NLR).  The NLR is the ORTE-maintained slot ID; if you
   COMM_SPAWN to a host in this ORTE universe that already has procs
   on it, the NLR for the new job will start at N (not 0).  So this is
   slightly better than mpi_paffinity_alone in the v1.2 series.
 * If a slot list is specified *and* mpi_paffinity_alone is set, we
   display an error and abort.
 * Remove calls from rmaps/rank_file component to register and lookup
   opal_paffinity mca params. 
 * Remove code in orte/odls that set affinities - instead, have them
   just pass a slot_list if it exists. 
 * Cleanup the orte/odls code that determined
   oversubscribed/want_processor as these were just opposites of each
   other.

This commit was SVN r18874.

The following Trac tickets were found above:
  Ticket 1383 --> https://svn.open-mpi.org/trac/ompi/ticket/1383
Этот коммит содержится в:
Jeff Squyres 2008-07-10 21:12:45 +00:00
родитель 773c92a6eb
Коммит 583bf425c0
8 изменённых файлов: 82 добавлений и 113 удалений

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2006 University of Houston. All rights reserved.
@ -48,6 +48,7 @@
#include "orte/mca/grpcomm/grpcomm.h"
#include "orte/runtime/orte_globals.h"
#include "orte/util/show_help.h"
#include "orte/mca/ess/ess.h"
#if !ORTE_DISABLE_FULL_SUPPORT
#include "orte/mca/routed/routed.h"
@ -245,6 +246,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
int num_processors;
#endif
bool orte_setup = false;
bool paffinity_enabled = false;
/* Setup enough to check get/set MCA params */
@ -330,18 +332,48 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
goto error;
}
/* Setup process affinity */
if ( OPAL_SUCCESS == (ret = opal_paffinity_base_slot_list_set((long)ORTE_PROC_MY_NAME->vpid))) {
/* If we were able to set processor affinity, try setting up memory affinity */
/* Setup process affinity. First check to see if a slot list was
specified. If so, use it. If no slot list was specified,
that's not an error -- just fall through and try the next
paffinity scheme. */
ret = opal_paffinity_base_slot_list_set((long)ORTE_PROC_MY_NAME->vpid);
if (OPAL_SUCCESS == ret) {
paffinity_enabled = true;
}
/* If an error occurred in the slot list setup (other than "there
was not slot list specified"), bail. */
else if (OPAL_ERR_NOT_FOUND != ret) {
error = "opal_paffinity_base_slot_list_set() returned an error";
goto error;
}
/* It's an error if multiple paffinity schemes were specified */
if (paffinity_enabled && ompi_mpi_paffinity_alone) {
ret = OMPI_ERR_BAD_PARAM;
error = "Multiple processor affinity schemes specified (can only specify one)";
goto error;
}
/* Otherwise, if mpi_paffinity_alone was set, use that scheme */
else if (ompi_mpi_paffinity_alone) {
opal_paffinity_base_cpu_set_t mask;
OPAL_PAFFINITY_CPU_ZERO(mask);
OPAL_PAFFINITY_CPU_SET(orte_ess.get_node_rank(ORTE_PROC_MY_NAME),
mask);
ret = opal_paffinity_base_set(mask);
if (OPAL_SUCCESS != ret) {
error = "Setting processor affinity failed";
goto error;
}
paffinity_enabled = true;
}
/* If we were able to set processor affinity, try setting up
memory affinity */
if (paffinity_enabled) {
if (OPAL_SUCCESS == opal_maffinity_base_open() &&
OPAL_SUCCESS == opal_maffinity_base_select()) {
OPAL_SUCCESS == opal_maffinity_base_select()) {
ompi_mpi_maffinity_setup = true;
}
}
if ( OPAL_ERROR == ret ){
error = "opal_paffinity_base_slot_list_set: error slot_list assigning";
goto error;
}
/* initialize datatypes. This step should be done early as it will
* create the local convertor and local arch used in the proc

@ -47,6 +47,7 @@ int ompi_debug_show_mpi_alloc_mem_leaks = 0;
bool ompi_debug_no_free_handles = false;
bool ompi_mpi_show_mca_params = false;
char *ompi_mpi_show_mca_params_file = NULL;
bool ompi_mpi_paffinity_alone = false;
bool ompi_mpi_abort_print_stack = false;
int ompi_mpi_abort_delay = 0;
bool ompi_mpi_keep_peer_hostnames = true;
@ -228,10 +229,11 @@ int ompi_mpi_register_params(void)
true);
}
/* Paffinity alone -- make mpi_paffinity_alone a synonym for
opal_paffinity_alone */
index = mca_base_param_find("opal", NULL, "paffinity_alone");
mca_base_param_reg_syn_name(index, "mpi", "paffinity_alone", false);
mca_base_param_reg_int_name("mpi", "paffinity_alone",
"If nonzero, assume that this job is the only (set of) process(es) running on each node and bind processes to processors, starting with processor ID 0",
false, false,
(int) ompi_mpi_paffinity_alone, &value);
ompi_mpi_paffinity_alone = OPAL_INT_TO_BOOL(value);
/* Sparse group storage support */

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2007 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2006-2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2008 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -94,8 +94,13 @@ OMPI_DECLSPEC extern bool ompi_mpi_show_mca_params;
OMPI_DECLSPEC extern char * ompi_mpi_show_mca_params_file;
/**
* If this value is true, we can check process binding to CPU
* If this value is true, assume that this ORTE job is the only job
* running on the nodes that have been allocated to it, and bind
* processes to the processor ID corresponding to their node local
* rank (if you COMM_SPAWN on to empty processors on the same node,
* the NLR will start at N, not 0).
*/
OMPI_DECLSPEC extern bool ompi_mpi_paffinity_alone;
/**
* Whether we should keep the string hostnames of all the MPI

@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -65,6 +66,10 @@ int opal_paffinity_base_open(void)
}
opal_paffinity_base_components_opened_valid = false;
mca_base_param_reg_string_name("opal", "paffinity_base_slot_list",
"Used to set list of processor IDs to bind MPI processes to (e.g., used in conjunction with rank files)",
true, false, NULL, NULL);
/* Open up all available components */

@ -340,16 +340,20 @@ int opal_paffinity_base_slot_list_set(long rank)
int item_cnt, socket_core_cnt, rc;
rc = mca_base_param_find("opal", NULL, "paffinity_slot_list");
if (rc >= 0) {
if (OPAL_SUCCESS == mca_base_param_lookup_string(rc, &slot_str)) {
if (NULL == slot_str) {
return OPAL_ERR_BAD_PARAM;
}
/* If there was not slot list specified, return a specific error
code indicating that */
if (rc <= 0) {
return OPAL_ERR_NOT_FOUND;
}
if (OPAL_SUCCESS == mca_base_param_lookup_string(rc, &slot_str)) {
if (NULL == slot_str) {
return OPAL_ERR_BAD_PARAM;
}
}
if (0 == strcmp("", slot_str)){
return OPAL_ERR_BAD_PARAM;
}
if (0 == strcmp("", slot_str)){
return OPAL_ERR_BAD_PARAM;
}
opal_output_verbose(5, opal_paffinity_base_output, "paffinity slot assignment: slot_list == %s", slot_str);
item = opal_argv_split (slot_str, ',');

@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2006 Los Alamos National Security, LLC. All rights
* reserved.
*
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -70,14 +70,6 @@ int opal_register_params(void)
mca_base_param_reg_string_name("opal", "signal",
"If a signal is received, display the stack trace frame",
false, false, string, NULL);
mca_base_param_reg_string_name("opal","paffinity_slot_list",
"Used to set list of slots to be bind to",
false,false, NULL, NULL);
mca_base_param_reg_int_name("opal", "paffinity_alone",
"If nonzero, assume that this job is the only (set of) process(es) running on each node and bind processes to processors, starting with processor ID 0",
false, false, (int)false, 0);
free(string);
}

@ -756,8 +756,8 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
orte_app_context_t *app, **apps;
orte_std_cntr_t num_apps;
orte_odls_child_t *child=NULL;
int i, num_processors, int_value;
bool want_processor, oversubscribed;
int i, num_processors;
bool oversubscribed;
int rc=ORTE_SUCCESS, ret;
bool launch_failed=true;
opal_buffer_t alert;
@ -811,55 +811,21 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
}
}
/* setup for processor affinity. If there are enough physical processors on this node, then
* we indicate which processor each process should be assigned to, IFF the user has requested
* processor affinity be used - the paffinity subsystem will make that final determination. All
* we do here is indicate that we should do the definitions just in case paffinity is active
*/
if (OPAL_SUCCESS != opal_get_num_processors(&num_processors)) {
/* if we cannot find the number of local processors, then default to conservative
* settings
if (opal_list_get_size(&orte_odls_globals.children) > (size_t)num_processors) {
/* if the #procs > #processors, declare us oversubscribed. This
* covers the case where the user didn't tell us anything about the
* number of available slots, so we defaulted to a value of 1
*/
want_processor = false; /* default to not being a hog */
oversubscribed = true;
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
"%s odls:launch could not get number of processors - using conservative settings",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
} else {
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
"%s odls:launch got %ld processors",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)num_processors));
/* grab a processor if we can */
if (opal_list_get_size(&orte_odls_globals.children) > (size_t)num_processors) {
want_processor = false;
} else {
want_processor = true;
}
if (opal_list_get_size(&orte_odls_globals.children) > (size_t)num_processors) {
/* if the #procs > #processors, declare us oversubscribed regardless
* of what the mapper claimed - the user may have told us something
* incorrect
*/
oversubscribed = true;
} else {
/* likewise, if there are more processors here than we were told,
* declare us to not be oversubscribed so we can be aggressive. This
* covers the case where the user didn't tell us anything about the
* number of available slots, so we defaulted to a value of 1
*/
oversubscribed = false;
}
/* otherwise, declare us to not be oversubscribed so we can be aggressive */
oversubscribed = false;
}
OPAL_OUTPUT_VERBOSE((5, orte_odls_globals.output,
"%s odls:launch oversubscribed set to %s want_processor set to %s",
"%s odls:launch oversubscribed set to %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
oversubscribed ? "true" : "false", want_processor ? "true" : "false"));
oversubscribed ? "true" : "false"));
/* setup to report the proc state to the HNP */
OBJ_CONSTRUCT(&alert, opal_buffer_t);
@ -1000,30 +966,12 @@ int orte_odls_base_default_launch_local(orte_jobid_t job,
opal_setenv("OMPI_COMM_WORLD_LOCAL_RANK", value, true, &app->env);
free(value);
{ /* unset paffinity_slot_list environment */
param = mca_base_param_environ_variable("opal", NULL, "paffinity_slot_list");
opal_unsetenv(param, &app->env);
free(param);
}
if ( NULL != child->slot_list ) {
param = mca_base_param_environ_variable("opal", NULL, "paffinity_slot_list");
asprintf(&value, "%s", child->slot_list);
opal_setenv(param, value, true, &app->env);
free(param);
free(value);
} else if (want_processor) { /* setting paffinity_alone */
int parameter = mca_base_param_find("opal", NULL, "paffinity_alone");
if ( parameter >=0 ) {
int_value = 0;
mca_base_param_lookup_int(parameter, &int_value);
if ( int_value ){
param = mca_base_param_environ_variable("opal", NULL, "paffinity_slot_list");
asprintf(&value, "%lu", (unsigned long) proc_rank);
opal_setenv(param, value, true, &app->env);
free(value);
free(param);
}
}
}
/* must unlock prior to fork to keep things clean in the

@ -74,7 +74,6 @@ orte_rmaps_rank_file_component_t mca_rmaps_rank_file_component = {
*/
static int orte_rmaps_rank_file_open(void)
{
int index, paffinity_alone = 0;
mca_rmaps_rank_file_component.priority = 0;
mca_base_param_reg_string(&mca_rmaps_rank_file_component.super.base_version,
@ -85,24 +84,6 @@ static int orte_rmaps_rank_file_open(void)
mca_rmaps_rank_file_component.priority = 100;
}
index = mca_base_param_find("opal", NULL, "paffinity_slot_list");
if (index >= 0) {
if (OPAL_SUCCESS == mca_base_param_lookup_string(index, &orte_mca_rmaps_rank_file_slot_list)) {
if (NULL != orte_mca_rmaps_rank_file_slot_list) {
mca_rmaps_rank_file_component.priority = 100;
}
}
}
index = mca_base_param_find("opal", NULL, "paffinity_alone");
if (index >= 0) {
if (OPAL_SUCCESS == mca_base_param_lookup_int(index, &paffinity_alone)) {
if ( 100 == mca_rmaps_rank_file_component.priority && paffinity_alone ){
opal_output(0, "WARNING: paffinity_alone cannot be set with paffinity_slot_list or rank_file\nTherefore mca_rmaps_rank_file_component.priority set to 0\n");
mca_rmaps_rank_file_component.priority = 0;
}
}
}
return ORTE_SUCCESS;
}