1
1
openmpi/ompi/mpiext/affinity/c/mpiext_affinity_str.c

421 строка
15 KiB
C

/*
* Copyright (c) 2004-2009 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* Simple routine to expose three things to the MPI process:
*
* 1. What processor(s) Open MPI bound this process to
* 2. What processor(s) this process is bound to
* 3. What processor(s) exist on this host
*
* Note that 1 and 2 may be different!
*/
#include "ompi_config.h"
#include <stdio.h>
#include <string.h>
#include "opal/mca/hwloc/base/base.h"
#include "opal/runtime/opal.h"
#include "ompi/communicator/communicator.h"
#include "ompi/errhandler/errhandler.h"
#include "ompi/mpi/c/bindings.h"
#include "ompi/mpiext/affinity/c/mpiext_affinity_c.h"
static const char FUNC_NAME[] = "OMPI_Affinity";
static const char ompi_nobind_str[] = "Open MPI did not bind this process";
static const char not_bound_str[] = "Not bound (i.e., bound to all processors)";
static int get_rsrc_ompi_bound(char str[OMPI_AFFINITY_STRING_MAX]);
static int get_rsrc_current_binding(char str[OMPI_AFFINITY_STRING_MAX]);
static int get_rsrc_exists(char str[OMPI_AFFINITY_STRING_MAX]);
static int get_layout_ompi_bound(char str[OMPI_AFFINITY_STRING_MAX]);
static int get_layout_current_binding(char str[OMPI_AFFINITY_STRING_MAX]);
static int get_layout_exists(char str[OMPI_AFFINITY_STRING_MAX]);
/*---------------------------------------------------------------------------*/
int OMPI_Affinity_str(ompi_affinity_fmt_t fmt_type,
char ompi_bound[OMPI_AFFINITY_STRING_MAX],
char current_binding[OMPI_AFFINITY_STRING_MAX],
char exists[OMPI_AFFINITY_STRING_MAX])
{
int ret;
memset(ompi_bound, 0, OMPI_AFFINITY_STRING_MAX);
memset(current_binding, 0, OMPI_AFFINITY_STRING_MAX);
/* If we have no hwloc support, return nothing */
if (NULL == opal_hwloc_topology) {
strncpy(ompi_bound, "Not supported", OMPI_AFFINITY_STRING_MAX);
strncpy(current_binding, "Not supported", OMPI_AFFINITY_STRING_MAX);
strncpy(exists, "Not supported", OMPI_AFFINITY_STRING_MAX);
return MPI_SUCCESS;
}
/* Otherwise, return useful information */
switch (fmt_type) {
case OMPI_AFFINITY_RSRC_STRING_FMT:
if (OMPI_SUCCESS != (ret = get_rsrc_ompi_bound(ompi_bound)) ||
OMPI_SUCCESS != (ret = get_rsrc_current_binding(current_binding)) ||
OMPI_SUCCESS != (ret = get_rsrc_exists(exists))) {
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, ret, FUNC_NAME);
}
break;
case OMPI_AFFINITY_LAYOUT_FMT:
if (OMPI_SUCCESS != (ret = get_layout_ompi_bound(ompi_bound)) ||
OMPI_SUCCESS != (ret = get_layout_current_binding(current_binding)) ||
OMPI_SUCCESS != (ret = get_layout_exists(exists))) {
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, ret, FUNC_NAME);
}
break;
default:
return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, FUNC_NAME);
}
return MPI_SUCCESS;
}
/*---------------------------------------------------------------------------*/
/*
* Where did OMPI bind this process? (prettyprint)
*/
static int get_rsrc_ompi_bound(char str[OMPI_AFFINITY_STRING_MAX])
{
int ret;
/* If OMPI did not bind, indicate that */
if (!ompi_rte_proc_is_bound) {
strncpy(str, ompi_nobind_str, OMPI_AFFINITY_STRING_MAX - 1);
return OMPI_SUCCESS;
}
if (NULL == orte_proc_applied_binding) {
ret = OPAL_ERR_NOT_BOUND;
} else {
ret = opal_hwloc_base_cset2str(str, OMPI_AFFINITY_STRING_MAX,
opal_hwloc_topology,
orte_proc_applied_binding);
}
if (OPAL_ERR_NOT_BOUND == ret) {
strncpy(str, not_bound_str, OMPI_AFFINITY_STRING_MAX - 1);
ret = OMPI_SUCCESS;
}
return ret;
}
/*
* Where is this process currently bound? (prettyprint)
*/
static int get_rsrc_current_binding(char str[OMPI_AFFINITY_STRING_MAX])
{
int ret;
hwloc_obj_t root;
hwloc_cpuset_t boundset, rootset;
bool bound = false;
/* get our root object */
root = hwloc_get_root_obj(opal_hwloc_topology);
rootset = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, root);
/* get our bindings */
boundset = hwloc_bitmap_alloc();
if (hwloc_get_cpubind(opal_hwloc_topology, boundset,
HWLOC_CPUBIND_PROCESS) < 0) {
/* we are NOT bound if get_cpubind fails, nor can we be bound
- the environment does not support it */
bound = false;
} else {
/* we are bound if the two cpusets are not equal, or if there
is only ONE PU available to us */
if (0 != hwloc_bitmap_compare(boundset, rootset) ||
opal_hwloc_base_single_cpu(rootset) ||
opal_hwloc_base_single_cpu(boundset)) {
bound = true;
}
}
/* If we are not bound, indicate that */
if (!bound) {
strncat(str, not_bound_str, OMPI_AFFINITY_STRING_MAX - 1);
ret = OMPI_SUCCESS;
}
/* If we are bound, print it out */
else {
ret = opal_hwloc_base_cset2str(str, OMPI_AFFINITY_STRING_MAX,
opal_hwloc_topology,
boundset);
if (OPAL_ERR_NOT_BOUND == ret) {
strncpy(str, not_bound_str, OMPI_AFFINITY_STRING_MAX - 1);
ret = OMPI_SUCCESS;
}
}
hwloc_bitmap_free(boundset);
return ret;
}
/*
* Prettyprint a list of all available sockets and cores. Note that
* this is *everything* -- not just the ones that are available to
* this process.
*/
static int get_rsrc_exists(char str[OMPI_AFFINITY_STRING_MAX])
{
bool first = true;
int i, num_cores, num_pus;
char tmp[BUFSIZ];
const int stmp = sizeof(tmp) - 1;
hwloc_obj_t socket, core, c2;
str[0] = '\0';
for (socket = hwloc_get_obj_by_type(opal_hwloc_topology,
HWLOC_OBJ_SOCKET, 0);
NULL != socket; socket = socket->next_cousin) {
/* If this isn't the first socket, add a delimiter */
if (!first) {
strncat(str, "; ", OMPI_AFFINITY_STRING_MAX - strlen(str));
}
first = false;
snprintf(tmp, stmp, "socket %d has ", socket->os_index);
strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str));
/* Find out how many cores are inside this socket, and get an
object pointing to the first core. Also count how many PUs
are in the first core. */
num_cores = hwloc_get_nbobjs_inside_cpuset_by_type(opal_hwloc_topology,
socket->cpuset,
HWLOC_OBJ_CORE);
core = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology,
socket->cpuset,
HWLOC_OBJ_CORE, 0);
if (NULL != core) {
num_pus =
hwloc_get_nbobjs_inside_cpuset_by_type(opal_hwloc_topology,
core->cpuset,
HWLOC_OBJ_PU);
/* Only 1 core */
if (1 == num_cores) {
strncat(str, "1 core with ",
OMPI_AFFINITY_STRING_MAX - strlen(str));
if (1 == num_pus) {
strncat(str, "1 hwt",
OMPI_AFFINITY_STRING_MAX - strlen(str));
} else {
snprintf(tmp, stmp, "%d hwts", num_pus);
strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str));
}
}
/* Multiple cores */
else {
bool same = true;
snprintf(tmp, stmp, "%d cores", num_cores);
strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str));
/* Do all the cores have the same number of PUs? */
for (c2 = core; NULL != c2; c2 = c2->next_cousin) {
if (hwloc_get_nbobjs_inside_cpuset_by_type(opal_hwloc_topology,
core->cpuset,
HWLOC_OBJ_PU) !=
num_pus) {
same = false;
break;
}
}
/* Yes, they all have the same number of PUs */
if (same) {
snprintf(tmp, stmp, ", each with %d hwt", num_pus);
strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str));
if (num_pus != 1) {
strncat(str, "s", OMPI_AFFINITY_STRING_MAX - strlen(str));
}
}
/* No, they have differing numbers of PUs */
else {
bool first = true;
strncat(str, "with (", OMPI_AFFINITY_STRING_MAX - strlen(str));
for (c2 = core; NULL != c2; c2 = c2->next_cousin) {
if (!first) {
strncat(str, ", ",
OMPI_AFFINITY_STRING_MAX - strlen(str));
}
first = false;
i = hwloc_get_nbobjs_inside_cpuset_by_type(opal_hwloc_topology,
core->cpuset,
HWLOC_OBJ_PU);
snprintf(tmp, stmp, "%d", i);
strncat(str, tmp, OMPI_AFFINITY_STRING_MAX - strlen(str));
}
strncat(str, ") hwts",
OMPI_AFFINITY_STRING_MAX - strlen(str));
}
}
}
}
return OMPI_SUCCESS;
}
/*---------------------------------------------------------------------------*/
/*
* Where did OMPI bind this process? (layout string)
*/
static int get_layout_ompi_bound(char str[OMPI_AFFINITY_STRING_MAX])
{
int ret;
/* If OMPI did not bind, indicate that */
if (!ompi_rte_proc_is_bound) {
strncpy(str, ompi_nobind_str, OMPI_AFFINITY_STRING_MAX - 1);
return OMPI_SUCCESS;
}
/* Find out what OMPI bound us to and prettyprint it */
if (NULL == orte_proc_applied_binding) {
ret = OPAL_ERR_NOT_BOUND;
} else {
ret = opal_hwloc_base_cset2mapstr(str, OMPI_AFFINITY_STRING_MAX,
opal_hwloc_topology,
orte_proc_applied_binding);
}
if (OPAL_ERR_NOT_BOUND == ret) {
strncpy(str, not_bound_str, OMPI_AFFINITY_STRING_MAX - 1);
ret = OMPI_SUCCESS;
}
return ret;
}
/*
* Where is this process currently bound? (layout string)
*/
static int get_layout_current_binding(char str[OMPI_AFFINITY_STRING_MAX])
{
int ret;
hwloc_obj_t root;
hwloc_cpuset_t boundset, rootset;
bool bound = false;
/* get our root object */
root = hwloc_get_root_obj(opal_hwloc_topology);
rootset = opal_hwloc_base_get_available_cpus(opal_hwloc_topology, root);
/* get our bindings */
boundset = hwloc_bitmap_alloc();
if (hwloc_get_cpubind(opal_hwloc_topology, boundset,
HWLOC_CPUBIND_PROCESS) < 0) {
/* we are NOT bound if get_cpubind fails, nor can we be bound
- the environment does not support it */
bound = false;
} else {
/* we are bound if the two cpusets are not equal, or if there
is only ONE PU available to us */
if (0 != hwloc_bitmap_compare(boundset, rootset) ||
opal_hwloc_base_single_cpu(rootset) ||
opal_hwloc_base_single_cpu(boundset)) {
bound = true;
}
}
/* If we are not bound, indicate that */
if (!bound) {
strncat(str, not_bound_str, OMPI_AFFINITY_STRING_MAX - 1);
ret = OMPI_SUCCESS;
}
/* If we are bound, print it out */
else {
ret = opal_hwloc_base_cset2mapstr(str, OMPI_AFFINITY_STRING_MAX,
opal_hwloc_topology,
boundset);
if (OPAL_ERR_NOT_BOUND == ret) {
strncpy(str, not_bound_str, OMPI_AFFINITY_STRING_MAX - 1);
ret = OMPI_SUCCESS;
}
}
hwloc_bitmap_free(boundset);
return ret;
}
/*
* Make a layout string of all available sockets and cores. Note that
* this is *everything* -- not just the ones that are available to
* this process.
*
* Example: [../..]
* Key: [] - signifies socket
* / - signifies core
* . - signifies PU
*/
static int get_layout_exists(char str[OMPI_AFFINITY_STRING_MAX])
{
int core_index, pu_index;
int len = OMPI_AFFINITY_STRING_MAX;
hwloc_obj_t socket, core, pu;
str[0] = '\0';
/* Iterate over all existing sockets */
for (socket = hwloc_get_obj_by_type(opal_hwloc_topology,
HWLOC_OBJ_SOCKET, 0);
NULL != socket;
socket = socket->next_cousin) {
strncat(str, "[", len - strlen(str));
/* Iterate over all existing cores in this socket */
core_index = 0;
for (core = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology,
socket->cpuset,
HWLOC_OBJ_CORE, core_index);
NULL != core;
core = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology,
socket->cpuset,
HWLOC_OBJ_CORE, ++core_index)) {
if (core_index > 0) {
strncat(str, "/", len - strlen(str));
}
/* Iterate over all existing PUs in this core */
pu_index = 0;
for (pu = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology,
core->cpuset,
HWLOC_OBJ_PU, pu_index);
NULL != pu;
pu = hwloc_get_obj_inside_cpuset_by_type(opal_hwloc_topology,
core->cpuset,
HWLOC_OBJ_PU, ++pu_index)) {
strncat(str, ".", len - strlen(str));
}
}
strncat(str, "]", len - strlen(str));
}
return OMPI_SUCCESS;
}