1
1

Somehow this has been sitting, uncommitted, in a local checkout since

last December.  :-(

Add new MCA param: maffinity_libnuma_policy.  Thanks to David
Singleton for the suggestion.  Here's the help text about it:

{{{
   MCA maffinity: parameter "maffinity_libnuma_policy" (current value:
                  <loose>, data source: default value)
                  Binding policy that determines what happens if memory
                  is unavailable on the local NUMA node.  A value of
                  "strict" means that the memory allocation will fail;
                  a value of "loose" means that the memory allocation
                  will spill over to another NUMA node.
}}}

This commit was SVN r24290.
Этот коммит содержится в:
Jeff Squyres 2011-01-24 14:39:16 +00:00
родитель 272fe89252
Коммит afa654746c
5 изменённых файлов: 117 добавлений и 39 удалений

Просмотреть файл

@ -9,7 +9,7 @@
# University of Stuttgart. All rights reserved. # University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California. # Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved. # All rights reserved.
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$ # $COPYRIGHT$
# #
# Additional copyrights may follow # Additional copyrights may follow
@ -19,6 +19,8 @@
AM_CPPFLAGS = $(maffinity_libnuma_CPPFLAGS) AM_CPPFLAGS = $(maffinity_libnuma_CPPFLAGS)
dist_pkgdata_DATA = help-opal-maffinity-libnuma.txt
sources = \ sources = \
maffinity_libnuma.h \ maffinity_libnuma.h \
maffinity_libnuma_component.c \ maffinity_libnuma_component.c \

Просмотреть файл

@ -0,0 +1,27 @@
# -*- text -*-
#
# Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the US/English help file for Open MPI's libnuma support
#
[invalid policy]
WARNING: An invalid value was given for the maffinity_libnuma_policy
MCA parameter. The policy determines what happens when Open MPI tries
to allocate local memory, but no local memory is available. The value
provided was:
Value: %s
PID: %d
Valid values are:
strict: the memory allocation will fail
loose: the memory allocation will spill over to remote memory
Your job will now abort.
#

Просмотреть файл

@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -34,10 +35,20 @@
BEGIN_C_DECLS BEGIN_C_DECLS
/* Component struct to wrap the base maffinity component and then include
some data private to this component. */
typedef struct {
/* Base component */
opal_maffinity_base_component_2_0_0_t base;
/* What libnuma memory binding policy we're using */
int libnuma_policy;
} opal_maffinity_libnuma_component_2_0_0_t;
/** /**
* Globally exported variable * Globally exported variable
*/ */
OPAL_DECLSPEC extern const opal_maffinity_base_component_2_0_0_t OPAL_DECLSPEC extern opal_maffinity_libnuma_component_2_0_0_t
mca_maffinity_libnuma_component; mca_maffinity_libnuma_component;

Просмотреть файл

@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -18,8 +19,14 @@
#include "opal_config.h" #include "opal_config.h"
#include <numa.h>
#include <numaif.h>
#include <unistd.h>
#include "opal/constants.h" #include "opal/constants.h"
#include "opal/util/show_help.h"
#include "opal/mca/maffinity/maffinity.h" #include "opal/mca/maffinity/maffinity.h"
#include "maffinity_libnuma.h" #include "maffinity_libnuma.h"
/* /*
@ -29,47 +36,77 @@ const char *opal_maffinity_libnuma_component_version_string =
"OPAL libnuma maffinity MCA component version " OPAL_VERSION; "OPAL libnuma maffinity MCA component version " OPAL_VERSION;
/* /*
* Local function * Local functions
*/ */
static int libnuma_open(void); static int libnuma_register(void);
/*
* Local variable
*/
static char *mca_policy = NULL;
/* /*
* Instantiate the public struct with all of our public information * Instantiate the public struct with all of our public information
* and pointers to our public functions in it * and pointers to our public functions in it
*/ */
const opal_maffinity_base_component_2_0_0_t mca_maffinity_libnuma_component = { opal_maffinity_libnuma_component_2_0_0_t mca_maffinity_libnuma_component = {
/* First, the mca_component_t struct containing meta information
about the component itself */
{ {
OPAL_MAFFINITY_BASE_VERSION_2_0_0, /* First, the mca_component_t struct containing meta information
about the component itself */
/* Component name and version */ {
"libnuma", OPAL_MAFFINITY_BASE_VERSION_2_0_0,
OPAL_MAJOR_VERSION,
OPAL_MINOR_VERSION,
OPAL_RELEASE_VERSION,
/* Component open and close functions */ /* Component name and version */
libnuma_open, "libnuma",
NULL, OPAL_MAJOR_VERSION,
opal_maffinity_libnuma_component_query OPAL_MINOR_VERSION,
OPAL_RELEASE_VERSION,
/* Component open and close functions */
NULL,
NULL,
opal_maffinity_libnuma_component_query,
libnuma_register,
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
}, },
{
/* The component is checkpoint ready */ /* Default libnuma memory binding policy */
MCA_BASE_METADATA_PARAM_CHECKPOINT MPOL_PREFERRED,
}
}; };
static int libnuma_open(void) static int libnuma_register(void)
{ {
mca_base_param_reg_int(&mca_maffinity_libnuma_component.base_version, char *val;
mca_base_param_reg_int(&mca_maffinity_libnuma_component.base.base_version,
"priority", "priority",
"Priority of the libnuma maffinity component", "Priority of the libnuma maffinity component",
false, false, 25, NULL); false, false, 25, NULL);
val = (MPOL_PREFERRED == mca_maffinity_libnuma_component.libnuma_policy ?
"loose" : "strict");
mca_base_param_reg_string(&mca_maffinity_libnuma_component.base.base_version,
"policy",
"Binding policy that determines what happens if memory is unavailable on the local NUMA node. A value of \"strict\" means that the memory allocation will fail; a value of \"loose\" means that the memory allocation will spill over to another NUMA node.",
false, false, val, &mca_policy);
if (strcasecmp(mca_policy, "loose") == 0) {
mca_maffinity_libnuma_component.libnuma_policy = MPOL_PREFERRED;
} else if (strcasecmp(mca_policy, "strict") == 0) {
mca_maffinity_libnuma_component.libnuma_policy = MPOL_BIND;
} else {
opal_show_help("help-opal-maffinity-libnuma.txt", "invalid policy",
true, mca_policy, getpid());
mca_maffinity_libnuma_component.libnuma_policy = MPOL_PREFERRED;
return OPAL_ERR_BAD_PARAM;
}
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }

Просмотреть файл

@ -9,7 +9,7 @@
* University of Stuttgart. All rights reserved. * University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California. * Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved. * All rights reserved.
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
* Additional copyrights may follow * Additional copyrights may follow
@ -69,13 +69,17 @@ int opal_maffinity_libnuma_component_query(mca_base_module_t **module, int *prio
static int libnuma_module_init(void) static int libnuma_module_init(void)
{ {
/* Tell libnuma that we want all memory affinity to be local (but /* Set the libnuma policy. This affects all memory allocation,
it's not an error if we can't -- prefer running in degraded not just libnuma memory allocation. */
mode to not running at all!). */
numa_set_strict(0);
numa_set_localalloc(); numa_set_localalloc();
/* Set strict or not, depending on the value of the MCA param */
if (MPOL_BIND == mca_maffinity_libnuma_component.libnuma_policy) {
numa_set_strict(1);
} else {
numa_set_strict(0);
}
return OPAL_SUCCESS; return OPAL_SUCCESS;
} }
@ -85,12 +89,8 @@ static int libnuma_module_set(opal_maffinity_base_segment_t *segments,
{ {
size_t i; size_t i;
/* Kinda crummy that we have to allocate each portion individually /* Explicitly set the memory binding policy for a set of
rather than provide a top-level function call that does it all, segments */
but the libnuma() interface doesn't seem to allow that
flexability -- they allow "interleaving", but not fine grained
placement of pages. */
for (i = 0; i < num_segments; ++i) { for (i = 0; i < num_segments; ++i) {
numa_setlocal_memory(segments[i].mbs_start_addr, numa_setlocal_memory(segments[i].mbs_start_addr,
segments[i].mbs_len); segments[i].mbs_len);
@ -115,7 +115,8 @@ static int libnuma_modules_bind(opal_maffinity_base_segment_t *segs,
unsigned long node_mask = (1 << node_id); unsigned long node_mask = (1 << node_id);
for(i = 0; i < count; i++) { for(i = 0; i < count; i++) {
rc = mbind(segs[i].mbs_start_addr, segs[i].mbs_len, MPOL_PREFERRED, rc = mbind(segs[i].mbs_start_addr, segs[i].mbs_len,
mca_maffinity_libnuma_component.libnuma_policy,
&node_mask, sizeof(node_mask) * 8, &node_mask, sizeof(node_mask) * 8,
#ifdef HAVE_MPOL_MF_MOVE #ifdef HAVE_MPOL_MF_MOVE
MPOL_MF_MOVE MPOL_MF_MOVE