1
1
openmpi/opal/mca/maffinity/hwloc/maffinity_hwloc_module.c

169 строки
4.9 KiB
C
Исходник Обычный вид История

/*
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2011 Cisco Systems, Inc. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
/* This component will only be compiled on Hwloc, where we are
guaranteed to have <unistd.h> and friends */
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include "opal/constants.h"
#include "opal/mca/base/mca_base_param.h"
#include "opal/mca/maffinity/maffinity.h"
#include "opal/mca/maffinity/base/base.h"
#include "maffinity_hwloc.h"
#include "opal/mca/hwloc/hwloc.h"
/*
* Local functions
*/
static int hwloc_module_init(void);
static int hwloc_module_set(opal_maffinity_base_segment_t *segments,
size_t num_segments);
static int hwloc_module_node_name_to_id(char *, int *);
Refs trac:2698 After a long period of development with many starts and stops, we finally got this where we wanted it. This commit introduces 2 new MCA params (note that the "maffinity_libnuma_policy" MCA param introduced by r24290 was removed when libnuma support was removed). Remember that maffinity policies are only in effect when paffinity is enaabled -- i.e., when processes are bound to processors! * '''maffinity_base_alloc_policy:''' Policy that determines how general memory allocations are bound after MPI_INIT. A value of "none" means that no memory policy is applied. A value of "local_only" means that all memory allocations will be restricted to the local NUMA node where each process is placed. Note that operating system paging policies are unaffected by this setting. For example, if "local_only" is used and local NUMA node memory is exhausted, a new memory allocation may cause paging. * '''maffinity_base_bind_failure_action:''' What Open MPI will do if it explicitly tries to bind memory to a specific NUMA location, and fails. Note that this is a different case than the general allocation policy described by maffinity_base_alloc_policy. A value of "warn" means that Open MPI will warn the first time this happens, but allow the job to continue (possibly with degraded performance). A value of "error" means that Open MPI will abort the job if this happens. This needs at least a little soak time on the trunk before going to v1.5. This commit was SVN r24639. The following SVN revision numbers were found above: r24290 --> open-mpi/ompi@afa654746c1506375ae70864b3ded19fa5b30fcb The following Trac tickets were found above: Ticket 2698 --> https://svn.open-mpi.org/trac/ompi/ticket/2698
2011-04-26 17:31:07 +04:00
static int hwloc_module_bind(opal_maffinity_base_segment_t *, size_t, int);
/*
* Hwloc maffinity module
*/
static const opal_maffinity_base_module_1_0_0_t local_module = {
/* Initialization function */
hwloc_module_init,
/* Module function pointers */
hwloc_module_set,
hwloc_module_node_name_to_id,
Refs trac:2698 After a long period of development with many starts and stops, we finally got this where we wanted it. This commit introduces 2 new MCA params (note that the "maffinity_libnuma_policy" MCA param introduced by r24290 was removed when libnuma support was removed). Remember that maffinity policies are only in effect when paffinity is enaabled -- i.e., when processes are bound to processors! * '''maffinity_base_alloc_policy:''' Policy that determines how general memory allocations are bound after MPI_INIT. A value of "none" means that no memory policy is applied. A value of "local_only" means that all memory allocations will be restricted to the local NUMA node where each process is placed. Note that operating system paging policies are unaffected by this setting. For example, if "local_only" is used and local NUMA node memory is exhausted, a new memory allocation may cause paging. * '''maffinity_base_bind_failure_action:''' What Open MPI will do if it explicitly tries to bind memory to a specific NUMA location, and fails. Note that this is a different case than the general allocation policy described by maffinity_base_alloc_policy. A value of "warn" means that Open MPI will warn the first time this happens, but allow the job to continue (possibly with degraded performance). A value of "error" means that Open MPI will abort the job if this happens. This needs at least a little soak time on the trunk before going to v1.5. This commit was SVN r24639. The following SVN revision numbers were found above: r24290 --> open-mpi/ompi@afa654746c1506375ae70864b3ded19fa5b30fcb The following Trac tickets were found above: Ticket 2698 --> https://svn.open-mpi.org/trac/ompi/ticket/2698
2011-04-26 17:31:07 +04:00
hwloc_module_bind
};
int opal_maffinity_hwloc_component_query(mca_base_module_t **module,
int *priority)
{
if (NULL == opal_hwloc_topology) {
return OPAL_ERROR;
}
*priority = mca_maffinity_hwloc_component.priority;
*module = (mca_base_module_t *) &local_module;
return OPAL_SUCCESS;
}
static int hwloc_module_init(void)
{
/* Nothing to do! */
Refs trac:2698 After a long period of development with many starts and stops, we finally got this where we wanted it. This commit introduces 2 new MCA params (note that the "maffinity_libnuma_policy" MCA param introduced by r24290 was removed when libnuma support was removed). Remember that maffinity policies are only in effect when paffinity is enaabled -- i.e., when processes are bound to processors! * '''maffinity_base_alloc_policy:''' Policy that determines how general memory allocations are bound after MPI_INIT. A value of "none" means that no memory policy is applied. A value of "local_only" means that all memory allocations will be restricted to the local NUMA node where each process is placed. Note that operating system paging policies are unaffected by this setting. For example, if "local_only" is used and local NUMA node memory is exhausted, a new memory allocation may cause paging. * '''maffinity_base_bind_failure_action:''' What Open MPI will do if it explicitly tries to bind memory to a specific NUMA location, and fails. Note that this is a different case than the general allocation policy described by maffinity_base_alloc_policy. A value of "warn" means that Open MPI will warn the first time this happens, but allow the job to continue (possibly with degraded performance). A value of "error" means that Open MPI will abort the job if this happens. This needs at least a little soak time on the trunk before going to v1.5. This commit was SVN r24639. The following SVN revision numbers were found above: r24290 --> open-mpi/ompi@afa654746c1506375ae70864b3ded19fa5b30fcb The following Trac tickets were found above: Ticket 2698 --> https://svn.open-mpi.org/trac/ompi/ticket/2698
2011-04-26 17:31:07 +04:00
return OPAL_SUCCESS;
}
static int hwloc_module_set(opal_maffinity_base_segment_t *segments,
size_t num_segments)
{
Refs trac:2698 After a long period of development with many starts and stops, we finally got this where we wanted it. This commit introduces 2 new MCA params (note that the "maffinity_libnuma_policy" MCA param introduced by r24290 was removed when libnuma support was removed). Remember that maffinity policies are only in effect when paffinity is enaabled -- i.e., when processes are bound to processors! * '''maffinity_base_alloc_policy:''' Policy that determines how general memory allocations are bound after MPI_INIT. A value of "none" means that no memory policy is applied. A value of "local_only" means that all memory allocations will be restricted to the local NUMA node where each process is placed. Note that operating system paging policies are unaffected by this setting. For example, if "local_only" is used and local NUMA node memory is exhausted, a new memory allocation may cause paging. * '''maffinity_base_bind_failure_action:''' What Open MPI will do if it explicitly tries to bind memory to a specific NUMA location, and fails. Note that this is a different case than the general allocation policy described by maffinity_base_alloc_policy. A value of "warn" means that Open MPI will warn the first time this happens, but allow the job to continue (possibly with degraded performance). A value of "error" means that Open MPI will abort the job if this happens. This needs at least a little soak time on the trunk before going to v1.5. This commit was SVN r24639. The following SVN revision numbers were found above: r24290 --> open-mpi/ompi@afa654746c1506375ae70864b3ded19fa5b30fcb The following Trac tickets were found above: Ticket 2698 --> https://svn.open-mpi.org/trac/ompi/ticket/2698
2011-04-26 17:31:07 +04:00
int rc = OPAL_SUCCESS;
char *msg = NULL;
size_t i;
Refs trac:2698 After a long period of development with many starts and stops, we finally got this where we wanted it. This commit introduces 2 new MCA params (note that the "maffinity_libnuma_policy" MCA param introduced by r24290 was removed when libnuma support was removed). Remember that maffinity policies are only in effect when paffinity is enaabled -- i.e., when processes are bound to processors! * '''maffinity_base_alloc_policy:''' Policy that determines how general memory allocations are bound after MPI_INIT. A value of "none" means that no memory policy is applied. A value of "local_only" means that all memory allocations will be restricted to the local NUMA node where each process is placed. Note that operating system paging policies are unaffected by this setting. For example, if "local_only" is used and local NUMA node memory is exhausted, a new memory allocation may cause paging. * '''maffinity_base_bind_failure_action:''' What Open MPI will do if it explicitly tries to bind memory to a specific NUMA location, and fails. Note that this is a different case than the general allocation policy described by maffinity_base_alloc_policy. A value of "warn" means that Open MPI will warn the first time this happens, but allow the job to continue (possibly with degraded performance). A value of "error" means that Open MPI will abort the job if this happens. This needs at least a little soak time on the trunk before going to v1.5. This commit was SVN r24639. The following SVN revision numbers were found above: r24290 --> open-mpi/ompi@afa654746c1506375ae70864b3ded19fa5b30fcb The following Trac tickets were found above: Ticket 2698 --> https://svn.open-mpi.org/trac/ompi/ticket/2698
2011-04-26 17:31:07 +04:00
hwloc_cpuset_t cpuset = NULL;
/* This module won't be used unless the process is already
processor-bound. So find out where we're processor bound, and
bind our memory there, too. */
cpuset = hwloc_bitmap_alloc();
Refs trac:2698 After a long period of development with many starts and stops, we finally got this where we wanted it. This commit introduces 2 new MCA params (note that the "maffinity_libnuma_policy" MCA param introduced by r24290 was removed when libnuma support was removed). Remember that maffinity policies are only in effect when paffinity is enaabled -- i.e., when processes are bound to processors! * '''maffinity_base_alloc_policy:''' Policy that determines how general memory allocations are bound after MPI_INIT. A value of "none" means that no memory policy is applied. A value of "local_only" means that all memory allocations will be restricted to the local NUMA node where each process is placed. Note that operating system paging policies are unaffected by this setting. For example, if "local_only" is used and local NUMA node memory is exhausted, a new memory allocation may cause paging. * '''maffinity_base_bind_failure_action:''' What Open MPI will do if it explicitly tries to bind memory to a specific NUMA location, and fails. Note that this is a different case than the general allocation policy described by maffinity_base_alloc_policy. A value of "warn" means that Open MPI will warn the first time this happens, but allow the job to continue (possibly with degraded performance). A value of "error" means that Open MPI will abort the job if this happens. This needs at least a little soak time on the trunk before going to v1.5. This commit was SVN r24639. The following SVN revision numbers were found above: r24290 --> open-mpi/ompi@afa654746c1506375ae70864b3ded19fa5b30fcb The following Trac tickets were found above: Ticket 2698 --> https://svn.open-mpi.org/trac/ompi/ticket/2698
2011-04-26 17:31:07 +04:00
if (NULL == cpuset) {
rc = OPAL_ERR_OUT_OF_RESOURCE;
msg = "hwloc_bitmap_alloc() failure";
goto out;
}
hwloc_get_cpubind(opal_hwloc_topology, cpuset, 0);
for (i = 0; i < num_segments; ++i) {
if (0 != hwloc_set_area_membind(opal_hwloc_topology,
segments[i].mbs_start_addr,
segments[i].mbs_len, cpuset,
HWLOC_MEMBIND_BIND,
HWLOC_MEMBIND_STRICT)) {
Refs trac:2698 After a long period of development with many starts and stops, we finally got this where we wanted it. This commit introduces 2 new MCA params (note that the "maffinity_libnuma_policy" MCA param introduced by r24290 was removed when libnuma support was removed). Remember that maffinity policies are only in effect when paffinity is enaabled -- i.e., when processes are bound to processors! * '''maffinity_base_alloc_policy:''' Policy that determines how general memory allocations are bound after MPI_INIT. A value of "none" means that no memory policy is applied. A value of "local_only" means that all memory allocations will be restricted to the local NUMA node where each process is placed. Note that operating system paging policies are unaffected by this setting. For example, if "local_only" is used and local NUMA node memory is exhausted, a new memory allocation may cause paging. * '''maffinity_base_bind_failure_action:''' What Open MPI will do if it explicitly tries to bind memory to a specific NUMA location, and fails. Note that this is a different case than the general allocation policy described by maffinity_base_alloc_policy. A value of "warn" means that Open MPI will warn the first time this happens, but allow the job to continue (possibly with degraded performance). A value of "error" means that Open MPI will abort the job if this happens. This needs at least a little soak time on the trunk before going to v1.5. This commit was SVN r24639. The following SVN revision numbers were found above: r24290 --> open-mpi/ompi@afa654746c1506375ae70864b3ded19fa5b30fcb The following Trac tickets were found above: Ticket 2698 --> https://svn.open-mpi.org/trac/ompi/ticket/2698
2011-04-26 17:31:07 +04:00
rc = OPAL_ERROR;
msg = "hwloc_set_area_membind() failure";
goto out;
}
}
Refs trac:2698 After a long period of development with many starts and stops, we finally got this where we wanted it. This commit introduces 2 new MCA params (note that the "maffinity_libnuma_policy" MCA param introduced by r24290 was removed when libnuma support was removed). Remember that maffinity policies are only in effect when paffinity is enaabled -- i.e., when processes are bound to processors! * '''maffinity_base_alloc_policy:''' Policy that determines how general memory allocations are bound after MPI_INIT. A value of "none" means that no memory policy is applied. A value of "local_only" means that all memory allocations will be restricted to the local NUMA node where each process is placed. Note that operating system paging policies are unaffected by this setting. For example, if "local_only" is used and local NUMA node memory is exhausted, a new memory allocation may cause paging. * '''maffinity_base_bind_failure_action:''' What Open MPI will do if it explicitly tries to bind memory to a specific NUMA location, and fails. Note that this is a different case than the general allocation policy described by maffinity_base_alloc_policy. A value of "warn" means that Open MPI will warn the first time this happens, but allow the job to continue (possibly with degraded performance). A value of "error" means that Open MPI will abort the job if this happens. This needs at least a little soak time on the trunk before going to v1.5. This commit was SVN r24639. The following SVN revision numbers were found above: r24290 --> open-mpi/ompi@afa654746c1506375ae70864b3ded19fa5b30fcb The following Trac tickets were found above: Ticket 2698 --> https://svn.open-mpi.org/trac/ompi/ticket/2698
2011-04-26 17:31:07 +04:00
out:
if (NULL != cpuset) {
hwloc_bitmap_free(cpuset);
}
if (OPAL_SUCCESS != rc) {
return opal_maffinity_base_report_bind_failure(__FILE__, __LINE__,
msg, rc);
}
return OPAL_SUCCESS;
}
static int hwloc_module_node_name_to_id(char *node_name, int *id)
{
/* GLB: fix me */
*id = atoi(node_name + 3);
return OPAL_SUCCESS;
}
Refs trac:2698 After a long period of development with many starts and stops, we finally got this where we wanted it. This commit introduces 2 new MCA params (note that the "maffinity_libnuma_policy" MCA param introduced by r24290 was removed when libnuma support was removed). Remember that maffinity policies are only in effect when paffinity is enaabled -- i.e., when processes are bound to processors! * '''maffinity_base_alloc_policy:''' Policy that determines how general memory allocations are bound after MPI_INIT. A value of "none" means that no memory policy is applied. A value of "local_only" means that all memory allocations will be restricted to the local NUMA node where each process is placed. Note that operating system paging policies are unaffected by this setting. For example, if "local_only" is used and local NUMA node memory is exhausted, a new memory allocation may cause paging. * '''maffinity_base_bind_failure_action:''' What Open MPI will do if it explicitly tries to bind memory to a specific NUMA location, and fails. Note that this is a different case than the general allocation policy described by maffinity_base_alloc_policy. A value of "warn" means that Open MPI will warn the first time this happens, but allow the job to continue (possibly with degraded performance). A value of "error" means that Open MPI will abort the job if this happens. This needs at least a little soak time on the trunk before going to v1.5. This commit was SVN r24639. The following SVN revision numbers were found above: r24290 --> open-mpi/ompi@afa654746c1506375ae70864b3ded19fa5b30fcb The following Trac tickets were found above: Ticket 2698 --> https://svn.open-mpi.org/trac/ompi/ticket/2698
2011-04-26 17:31:07 +04:00
static int hwloc_module_bind(opal_maffinity_base_segment_t *segs,
size_t count, int node_id)
{
size_t i;
Refs trac:2698 After a long period of development with many starts and stops, we finally got this where we wanted it. This commit introduces 2 new MCA params (note that the "maffinity_libnuma_policy" MCA param introduced by r24290 was removed when libnuma support was removed). Remember that maffinity policies are only in effect when paffinity is enaabled -- i.e., when processes are bound to processors! * '''maffinity_base_alloc_policy:''' Policy that determines how general memory allocations are bound after MPI_INIT. A value of "none" means that no memory policy is applied. A value of "local_only" means that all memory allocations will be restricted to the local NUMA node where each process is placed. Note that operating system paging policies are unaffected by this setting. For example, if "local_only" is used and local NUMA node memory is exhausted, a new memory allocation may cause paging. * '''maffinity_base_bind_failure_action:''' What Open MPI will do if it explicitly tries to bind memory to a specific NUMA location, and fails. Note that this is a different case than the general allocation policy described by maffinity_base_alloc_policy. A value of "warn" means that Open MPI will warn the first time this happens, but allow the job to continue (possibly with degraded performance). A value of "error" means that Open MPI will abort the job if this happens. This needs at least a little soak time on the trunk before going to v1.5. This commit was SVN r24639. The following SVN revision numbers were found above: r24290 --> open-mpi/ompi@afa654746c1506375ae70864b3ded19fa5b30fcb The following Trac tickets were found above: Ticket 2698 --> https://svn.open-mpi.org/trac/ompi/ticket/2698
2011-04-26 17:31:07 +04:00
int rc = OPAL_SUCCESS;
char *msg = NULL;
hwloc_cpuset_t cpuset = NULL;
cpuset = hwloc_bitmap_alloc();
Refs trac:2698 After a long period of development with many starts and stops, we finally got this where we wanted it. This commit introduces 2 new MCA params (note that the "maffinity_libnuma_policy" MCA param introduced by r24290 was removed when libnuma support was removed). Remember that maffinity policies are only in effect when paffinity is enaabled -- i.e., when processes are bound to processors! * '''maffinity_base_alloc_policy:''' Policy that determines how general memory allocations are bound after MPI_INIT. A value of "none" means that no memory policy is applied. A value of "local_only" means that all memory allocations will be restricted to the local NUMA node where each process is placed. Note that operating system paging policies are unaffected by this setting. For example, if "local_only" is used and local NUMA node memory is exhausted, a new memory allocation may cause paging. * '''maffinity_base_bind_failure_action:''' What Open MPI will do if it explicitly tries to bind memory to a specific NUMA location, and fails. Note that this is a different case than the general allocation policy described by maffinity_base_alloc_policy. A value of "warn" means that Open MPI will warn the first time this happens, but allow the job to continue (possibly with degraded performance). A value of "error" means that Open MPI will abort the job if this happens. This needs at least a little soak time on the trunk before going to v1.5. This commit was SVN r24639. The following SVN revision numbers were found above: r24290 --> open-mpi/ompi@afa654746c1506375ae70864b3ded19fa5b30fcb The following Trac tickets were found above: Ticket 2698 --> https://svn.open-mpi.org/trac/ompi/ticket/2698
2011-04-26 17:31:07 +04:00
if (NULL == cpuset) {
rc = OPAL_ERR_OUT_OF_RESOURCE;
msg = "hwloc_bitmap_alloc() failure";
goto out;
}
hwloc_bitmap_set(cpuset, node_id);
for(i = 0; i < count; i++) {
if (0 != hwloc_set_area_membind(opal_hwloc_topology,
segs[i].mbs_start_addr,
segs[i].mbs_len, cpuset,
HWLOC_MEMBIND_BIND,
HWLOC_MEMBIND_STRICT)) {
Refs trac:2698 After a long period of development with many starts and stops, we finally got this where we wanted it. This commit introduces 2 new MCA params (note that the "maffinity_libnuma_policy" MCA param introduced by r24290 was removed when libnuma support was removed). Remember that maffinity policies are only in effect when paffinity is enaabled -- i.e., when processes are bound to processors! * '''maffinity_base_alloc_policy:''' Policy that determines how general memory allocations are bound after MPI_INIT. A value of "none" means that no memory policy is applied. A value of "local_only" means that all memory allocations will be restricted to the local NUMA node where each process is placed. Note that operating system paging policies are unaffected by this setting. For example, if "local_only" is used and local NUMA node memory is exhausted, a new memory allocation may cause paging. * '''maffinity_base_bind_failure_action:''' What Open MPI will do if it explicitly tries to bind memory to a specific NUMA location, and fails. Note that this is a different case than the general allocation policy described by maffinity_base_alloc_policy. A value of "warn" means that Open MPI will warn the first time this happens, but allow the job to continue (possibly with degraded performance). A value of "error" means that Open MPI will abort the job if this happens. This needs at least a little soak time on the trunk before going to v1.5. This commit was SVN r24639. The following SVN revision numbers were found above: r24290 --> open-mpi/ompi@afa654746c1506375ae70864b3ded19fa5b30fcb The following Trac tickets were found above: Ticket 2698 --> https://svn.open-mpi.org/trac/ompi/ticket/2698
2011-04-26 17:31:07 +04:00
rc = OPAL_ERROR;
msg = "hwloc_set_area_membind() failure";
goto out;
}
}
Refs trac:2698 After a long period of development with many starts and stops, we finally got this where we wanted it. This commit introduces 2 new MCA params (note that the "maffinity_libnuma_policy" MCA param introduced by r24290 was removed when libnuma support was removed). Remember that maffinity policies are only in effect when paffinity is enaabled -- i.e., when processes are bound to processors! * '''maffinity_base_alloc_policy:''' Policy that determines how general memory allocations are bound after MPI_INIT. A value of "none" means that no memory policy is applied. A value of "local_only" means that all memory allocations will be restricted to the local NUMA node where each process is placed. Note that operating system paging policies are unaffected by this setting. For example, if "local_only" is used and local NUMA node memory is exhausted, a new memory allocation may cause paging. * '''maffinity_base_bind_failure_action:''' What Open MPI will do if it explicitly tries to bind memory to a specific NUMA location, and fails. Note that this is a different case than the general allocation policy described by maffinity_base_alloc_policy. A value of "warn" means that Open MPI will warn the first time this happens, but allow the job to continue (possibly with degraded performance). A value of "error" means that Open MPI will abort the job if this happens. This needs at least a little soak time on the trunk before going to v1.5. This commit was SVN r24639. The following SVN revision numbers were found above: r24290 --> open-mpi/ompi@afa654746c1506375ae70864b3ded19fa5b30fcb The following Trac tickets were found above: Ticket 2698 --> https://svn.open-mpi.org/trac/ompi/ticket/2698
2011-04-26 17:31:07 +04:00
out:
if (NULL != cpuset) {
hwloc_bitmap_free(cpuset);
}
if (OPAL_SUCCESS != rc) {
return opal_maffinity_base_report_bind_failure(__FILE__, __LINE__,
msg, rc);
}
return OPAL_SUCCESS;
}