081669b440
cmr=v1.7.5:reviewer=rhc:subject=fix pretty-print of bindings This commit was SVN r30968.
421 строка
15 KiB
C
421 строка
15 KiB
C
/*
|
|
* Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana
|
|
* University Research and Technology
|
|
* Corporation. All rights reserved.
|
|
* Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved.
|
|
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
|
|
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
|
|
* reserved.
|
|
* $COPYRIGHT$
|
|
*
|
|
* Additional copyrights may follow
|
|
*
|
|
* $HEADER$
|
|
*
|
|
* Simple routine to expose three things to the MPI process:
|
|
*
|
|
* 1. What processor(s) Open MPI bound this process to
|
|
* 2. What processor(s) this process is bound to
|
|
* 3. What processor(s) exist on this host
|
|
*
|
|
* Note that 1 and 2 may be different!
|
|
*/
|
|
|
|
#include "ompi_config.h"
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
|
|
#include "opal/mca/hwloc/base/base.h"
|
|
#include "opal/runtime/opal.h"
|
|
|
|
#include "ompi/communicator/communicator.h"
|
|
#include "ompi/errhandler/errhandler.h"
|
|
#include "ompi/mpi/c/bindings.h"
|
|
#include "ompi/mpiext/affinity/c/mpiext_affinity_c.h"
|
|
|
|
static const char FUNC_NAME[] = "OMPI_Affinity";
|
|
static const char ompi_nobind_str[] = "Open MPI did not bind this process";
|
|
static const char not_bound_str[] = "Not bound (i.e., bound to all processors)";
|
|
|
|
static int get_rsrc_ompi_bound(char str[OMPI_AFFINITY_STRING_MAX]);
|
|
static int get_rsrc_current_binding(char str[OMPI_AFFINITY_STRING_MAX]);
|
|
static int get_rsrc_exists(char str[OMPI_AFFINITY_STRING_MAX]);
|
|
static int get_layout_ompi_bound(char str[OMPI_AFFINITY_STRING_MAX]);
|
|
static int get_layout_current_binding(char str[OMPI_AFFINITY_STRING_MAX]);
|
|
static int get_layout_exists(char str[OMPI_AFFINITY_STRING_MAX]);
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
|
|
int OMPI_Affinity_str(ompi_affinity_fmt_t fmt_type,
|
|
char ompi_bound[OMPI_AFFINITY_STRING_MAX],
|
|
char current_binding[OMPI_AFFINITY_STRING_MAX],
|
|
char exists[OMPI_AFFINITY_STRING_MAX])
|
|
{
|
|
int ret;
|
|
|
|
memset(ompi_bound, 0, OMPI_AFFINITY_STRING_MAX);
|
|
memset(current_binding, 0, OMPI_AFFINITY_STRING_MAX);
|
|
|
|
/* If we have no hwloc support, return nothing */
|
|
if (NULL == opal_hwloc_topology) {
|
|
strncpy(ompi_bound, "Not supported", OMPI_AFFINITY_STRING_MAX);
|
|
strncpy(current_binding, "Not supported", OMPI_AFFINITY_STRING_MAX);
|
|
strncpy(exists, "Not supported", OMPI_AFFINITY_STRING_MAX);
|
|
return MPI_SUCCESS;
|
|
}
|
|
|
|
/* Otherwise, return useful information */
|
|
switch (fmt_type) {
|
|
case OMPI_AFFINITY_RSRC_STRING_FMT:
|
|
if (OMPI_SUCCESS != (ret = get_rsrc_ompi_bound(ompi_bound)) ||
|
|
OMPI_SUCCESS != (ret = get_rsrc_current_binding(current_binding)) ||
|
|
OMPI_SUCCESS != (ret = get_rsrc_exists(exists))) {
|
|
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, ret, FUNC_NAME);
|
|
}
|
|
break;
|
|
case OMPI_AFFINITY_LAYOUT_FMT:
|
|
if (OMPI_SUCCESS != (ret = get_layout_ompi_bound(ompi_bound)) ||
|
|
OMPI_SUCCESS != (ret = get_layout_current_binding(current_binding)) ||
|
|
OMPI_SUCCESS != (ret = get_layout_exists(exists))) {
|
|
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, ret, FUNC_NAME);
|
|
}
|
|
break;
|
|
default:
|
|
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME);
|
|
}
|
|
|
|
return MPI_SUCCESS;
|
|
}
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
|
|
/*
|
|
* Where did OMPI bind this process? (prettyprint)
|
|
*/
|
|
static int get_rsrc_ompi_bound(char str[OMPI_AFFINITY_STRING_MAX])
|
|
{
|
|
int ret;
|
|
|
|
/* If OMPI did not bind, indicate that */
|
|
if (!ompi_rte_proc_is_bound) {
|
|
strncpy(str, ompi_nobind_str, OMPI_AFFINITY_STRING_MAX - 1);
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
if (NULL == orte_proc_applied_binding) {
|
|
ret = OPAL_ERR_NOT_BOUND;
|
|
} else {
|
|
ret = opal_hwloc_base_cset2str(str, OMPI_AFFINITY_STRING_MAX,
|
|
opal_hwloc_topology,
|
|
orte_proc_applied_binding);
|
|
}
|
|
if (OPAL_ERR_NOT_BOUND == ret) {
|
|
strncpy(str, not_bound_str, OMPI_AFFINITY_STRING_MAX - 1);
|
|
ret = OMPI_SUCCESS;
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
|
|
/*
|
|
* Where is this process currently bound? (prettyprint)
|
|
*/
|
|
static int get_rsrc_current_binding(char str[OMPI_AFFINITY_STRING_MAX])
|
|
{
|
|
int ret;
|
|
hwloc_obj_t root;
|
|
hwloc_cpuset_t boundset, rootset;
|
|
bool bound = false;
|
|
|
|
/* get our root object */
|
|
root = hwloc_get_root_obj(opal_hwloc_topology);
|
|
rootset = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, root);
|
|
|
|
/* get our bindings */
|
|
boundset = hwloc_bitmap_alloc();
|
|
if (hwloc_get_cpubind(opal_hwloc_topology, boundset,
|
|
HWLOC_CPUBIND_PROCESS) < 0) {
|
|
/* we are NOT bound if get_cpubind fails, nor can we be bound
|
|
- the environment does not support it */
|
|
bound = false;
|
|
} else {
|
|
/* we are bound if the two cpusets are not equal, or if there
|
|
is only ONE PU available to us */
|
|
if (0 != hwloc_bitmap_compare(boundset, rootset) ||
|
|
opal_hwloc_base_single_cpu(rootset) ||
|
|
opal_hwloc_base_single_cpu(boundset)) {
|
|
bound = true;
|
|
}
|
|
}
|
|
|
|
/* If we are not bound, indicate that */
|
|
if (!bound) {
|
|
strncat(str, not_bound_str, OMPI_AFFINITY_STRING_MAX - 1);
|
|
ret = OMPI_SUCCESS;
|
|
}
|
|
|
|
/* If we are bound, print it out */
|
|
else {
|
|
ret = opal_hwloc_base_cset2str(str, OMPI_AFFINITY_STRING_MAX,
|
|
opal_hwloc_topology,
|
|
boundset);
|
|
if (OPAL_ERR_NOT_BOUND == ret) {
|
|
strncpy(str, not_bound_str, OMPI_AFFINITY_STRING_MAX - 1);
|
|
ret = OMPI_SUCCESS;
|
|
}
|
|
}
|
|
hwloc_bitmap_free(boundset);
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
/*
|
|
* Prettyprint a list of all available sockets and cores. Note that
|
|
* this is *everything* -- not just the ones that are available to
|
|
* this process.
|
|
*/
|
|
static int get_rsrc_exists(char str[OMPI_AFFINITY_STRING_MAX])
|
|
{
|
|
bool first = true;
|
|
int i, num_cores, num_pus;
|
|
char tmp[BUFSIZ];
|
|
const int stmp = sizeof(tmp) - 1;
|
|
hwloc_obj_t socket, core, c2;
|
|
|
|
str[0] = '\0';
|
|
for (socket = hwloc_get_obj_by_type(opal_hwloc_topology,
|
|
HWLOC_OBJ_SOCKET, 0);
|
|
NULL != socket; socket = socket->next_cousin) {
|
|
/* If this isn't the first socket, add a delimiter */
|
|
if (!first) {
|
|
strncat(str, "; ", OMPI_AFFINITY_STRING_MAX - strlen(str));
|
|
}
|
|
first = false;
|
|
|
|
snprintf(tmp, stmp, "socket %d has ", socket->os_index);
|
|
strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str));
|
|
|
|
/* Find out how many cores are inside this socket, and get an
|
|
object pointing to the first core. Also count how many PUs
|
|
are in the first core. */
|
|
num_cores = hwloc_get_nbobjs_inside_cpuset_by_type(opal_hwloc_topology,
|
|
socket->cpuset,
|
|
HWLOC_OBJ_CORE);
|
|
core = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology,
|
|
socket->cpuset,
|
|
HWLOC_OBJ_CORE, 0);
|
|
if (NULL != core) {
|
|
num_pus =
|
|
hwloc_get_nbobjs_inside_cpuset_by_type(opal_hwloc_topology,
|
|
core->cpuset,
|
|
HWLOC_OBJ_PU);
|
|
|
|
/* Only 1 core */
|
|
if (1 == num_cores) {
|
|
strncat(str, "1 core with ",
|
|
OMPI_AFFINITY_STRING_MAX - strlen(str));
|
|
if (1 == num_pus) {
|
|
strncat(str, "1 hwt",
|
|
OMPI_AFFINITY_STRING_MAX - strlen(str));
|
|
} else {
|
|
snprintf(tmp, stmp, "%d hwts", num_pus);
|
|
strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str));
|
|
}
|
|
}
|
|
|
|
/* Multiple cores */
|
|
else {
|
|
bool same = true;
|
|
|
|
snprintf(tmp, stmp, "%d cores", num_cores);
|
|
strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str));
|
|
|
|
/* Do all the cores have the same number of PUs? */
|
|
for (c2 = core; NULL != c2; c2 = c2->next_cousin) {
|
|
if (hwloc_get_nbobjs_inside_cpuset_by_type(opal_hwloc_topology,
|
|
core->cpuset,
|
|
HWLOC_OBJ_PU) !=
|
|
num_pus) {
|
|
same = false;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Yes, they all have the same number of PUs */
|
|
if (same) {
|
|
snprintf(tmp, stmp, ", each with %d hwt", num_pus);
|
|
strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str));
|
|
if (num_pus != 1) {
|
|
strncat(str, "s", OMPI_AFFINITY_STRING_MAX - strlen(str));
|
|
}
|
|
}
|
|
|
|
/* No, they have differing numbers of PUs */
|
|
else {
|
|
bool first = true;
|
|
|
|
strncat(str, "with (", OMPI_AFFINITY_STRING_MAX - strlen(str));
|
|
for (c2 = core; NULL != c2; c2 = c2->next_cousin) {
|
|
if (!first) {
|
|
strncat(str, ", ",
|
|
OMPI_AFFINITY_STRING_MAX - strlen(str));
|
|
}
|
|
first = false;
|
|
|
|
i = hwloc_get_nbobjs_inside_cpuset_by_type(opal_hwloc_topology,
|
|
core->cpuset,
|
|
HWLOC_OBJ_PU);
|
|
snprintf(tmp, stmp, "%d", i);
|
|
strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str));
|
|
}
|
|
strncat(str, ") hwts",
|
|
OMPI_AFFINITY_STRING_MAX - strlen(str));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
/*---------------------------------------------------------------------------*/
|
|
|
|
/*
|
|
* Where did OMPI bind this process? (layout string)
|
|
*/
|
|
static int get_layout_ompi_bound(char str[OMPI_AFFINITY_STRING_MAX])
|
|
{
|
|
int ret;
|
|
|
|
/* If OMPI did not bind, indicate that */
|
|
if (!ompi_rte_proc_is_bound) {
|
|
strncpy(str, ompi_nobind_str, OMPI_AFFINITY_STRING_MAX - 1);
|
|
return OMPI_SUCCESS;
|
|
}
|
|
|
|
/* Find out what OMPI bound us to and prettyprint it */
|
|
if (NULL == orte_proc_applied_binding) {
|
|
ret = OPAL_ERR_NOT_BOUND;
|
|
} else {
|
|
ret = opal_hwloc_base_cset2mapstr(str, OMPI_AFFINITY_STRING_MAX,
|
|
opal_hwloc_topology,
|
|
orte_proc_applied_binding);
|
|
}
|
|
if (OPAL_ERR_NOT_BOUND == ret) {
|
|
strncpy(str, not_bound_str, OMPI_AFFINITY_STRING_MAX - 1);
|
|
ret = OMPI_SUCCESS;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Where is this process currently bound? (layout string)
|
|
*/
|
|
static int get_layout_current_binding(char str[OMPI_AFFINITY_STRING_MAX])
|
|
{
|
|
int ret;
|
|
hwloc_obj_t root;
|
|
hwloc_cpuset_t boundset, rootset;
|
|
bool bound = false;
|
|
|
|
/* get our root object */
|
|
root = hwloc_get_root_obj(opal_hwloc_topology);
|
|
rootset = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, root);
|
|
|
|
/* get our bindings */
|
|
boundset = hwloc_bitmap_alloc();
|
|
if (hwloc_get_cpubind(opal_hwloc_topology, boundset,
|
|
HWLOC_CPUBIND_PROCESS) < 0) {
|
|
/* we are NOT bound if get_cpubind fails, nor can we be bound
|
|
- the environment does not support it */
|
|
bound = false;
|
|
} else {
|
|
/* we are bound if the two cpusets are not equal, or if there
|
|
is only ONE PU available to us */
|
|
if (0 != hwloc_bitmap_compare(boundset, rootset) ||
|
|
opal_hwloc_base_single_cpu(rootset) ||
|
|
opal_hwloc_base_single_cpu(boundset)) {
|
|
bound = true;
|
|
}
|
|
}
|
|
|
|
/* If we are not bound, indicate that */
|
|
if (!bound) {
|
|
strncat(str, not_bound_str, OMPI_AFFINITY_STRING_MAX - 1);
|
|
ret = OMPI_SUCCESS;
|
|
}
|
|
|
|
/* If we are bound, print it out */
|
|
else {
|
|
ret = opal_hwloc_base_cset2mapstr(str, OMPI_AFFINITY_STRING_MAX,
|
|
opal_hwloc_topology,
|
|
boundset);
|
|
if (OPAL_ERR_NOT_BOUND == ret) {
|
|
strncpy(str, not_bound_str, OMPI_AFFINITY_STRING_MAX - 1);
|
|
ret = OMPI_SUCCESS;
|
|
}
|
|
}
|
|
hwloc_bitmap_free(boundset);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Make a layout string of all available sockets and cores. Note that
|
|
* this is *everything* -- not just the ones that are available to
|
|
* this process.
|
|
*
|
|
* Example: [../..]
|
|
* Key: [] - signifies socket
|
|
* / - signifies core
|
|
* . - signifies PU
|
|
*/
|
|
static int get_layout_exists(char str[OMPI_AFFINITY_STRING_MAX])
|
|
{
|
|
int core_index, pu_index;
|
|
int len = OMPI_AFFINITY_STRING_MAX;
|
|
hwloc_obj_t socket, core, pu;
|
|
|
|
str[0] = '\0';
|
|
|
|
/* Iterate over all existing sockets */
|
|
for (socket = hwloc_get_obj_by_type(opal_hwloc_topology,
|
|
HWLOC_OBJ_SOCKET, 0);
|
|
NULL != socket;
|
|
socket = socket->next_cousin) {
|
|
strncat(str, "[", len - strlen(str));
|
|
|
|
/* Iterate over all existing cores in this socket */
|
|
core_index = 0;
|
|
for (core = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology,
|
|
socket->cpuset,
|
|
HWLOC_OBJ_CORE, core_index);
|
|
NULL != core;
|
|
core = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology,
|
|
socket->cpuset,
|
|
HWLOC_OBJ_CORE, ++core_index)) {
|
|
if (core_index > 0) {
|
|
strncat(str, "/", len - strlen(str));
|
|
}
|
|
|
|
/* Iterate over all existing PUs in this core */
|
|
pu_index = 0;
|
|
for (pu = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology,
|
|
core->cpuset,
|
|
HWLOC_OBJ_PU, pu_index);
|
|
NULL != pu;
|
|
pu = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology,
|
|
core->cpuset,
|
|
HWLOC_OBJ_PU, ++pu_index)) {
|
|
strncat(str, ".", len - strlen(str));
|
|
}
|
|
}
|
|
strncat(str, "]", len - strlen(str));
|
|
}
|
|
|
|
return OMPI_SUCCESS;
|
|
}
|