1
1

Add verbose output to nidmap code for debugging as this is a new, and sometimes fragile, feature

Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
Ralph Castain 2017-05-10 12:40:02 -07:00
родитель 911961ee21
Коммит 55f4b825af
3 изменённых файлов: 48 добавлений и 1 удалений

Просмотреть файл

@ -13,7 +13,7 @@
* reserved.
* Copyright (c) 2007-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007-2008 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
*
@ -48,6 +48,7 @@
#include "orte/mca/schizo/base/base.h"
#include "orte/util/listener.h"
#include "orte/util/name_fns.h"
#include "orte/util/nidmap.h"
#include "orte/util/proc_info.h"
#include "orte/util/error_strings.h"
#include "orte/orted/pmix/pmix_server.h"
@ -216,6 +217,7 @@ int orte_init(int* pargc, char*** pargv, orte_proc_type_t flags)
if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
/* let the pmix server register params */
pmix_server_register_params();
orte_util_nidmap_init();
OPAL_TIMING_ENV_NEXT(tmng, "pmix_server_register_params");
}

Просмотреть файл

@ -74,6 +74,27 @@
#include "orte/util/nidmap.h"
static int orte_nidmap_verbosity = -1;
static int orte_nidmap_output = -1;
void orte_util_nidmap_init(void)
{
orte_nidmap_verbosity = -1;
(void) mca_base_var_register ("orte", "orte", NULL, "nidmap_verbose",
"Verbosity level for ORTE debug messages in the nidmap utilities",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL,
&orte_nidmap_verbosity);
/* set default output */
orte_nidmap_output = opal_output_open(NULL);
/* open up the verbose output for debugging */
if (0 < orte_nidmap_verbosity) {
opal_output_set_verbosity(orte_nidmap_output, orte_nidmap_verbosity);
}
}
int orte_util_build_daemon_nidmap(void)
{
int i;
@ -585,6 +606,9 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer)
OBJ_RELEASE(rng);
}
OPAL_LIST_DESTRUCT(&slots);
opal_output_verbose(1, orte_nidmap_output,
"%s SLOT ASSIGNMENTS: %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp);
/* pack the string */
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
@ -610,6 +634,9 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer)
OPAL_LIST_DESTRUCT(&flags);
/* pack the string */
opal_output_verbose(1, orte_nidmap_output,
"%s FLAG ASSIGNMENTS: %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp);
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
return rc;
@ -652,6 +679,9 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer)
}
if (NULL == rng->t) {
/* need to account for NULL topology */
opal_output_verbose(1, orte_nidmap_output,
"%s PACKING NULL TOPOLOGY",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
tmp2 = NULL;
if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &tmp2, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
@ -662,6 +692,9 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer)
return rc;
}
} else {
opal_output_verbose(1, orte_nidmap_output,
"%s PACKING TOPOLOGY: %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), rng->t->sig);
/* pack this topology string */
if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &rng->t->sig, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
@ -685,6 +718,9 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer)
}
OPAL_LIST_DESTRUCT(&topos);
/* pack the string */
opal_output_verbose(1, orte_nidmap_output,
"%s TOPOLOGY ASSIGNMENTS: %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp);
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&bucket);
@ -1011,6 +1047,9 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer)
if (NULL == bptr) {
/* our topology is first in the array */
t2 = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, 0);
opal_output_verbose(1, orte_nidmap_output,
"%s ASSIGNING ALL TOPOLOGIES TO: %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), t2->sig);
for (n=0; n < orte_node_pool->size; n++) {
if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) {
if (NULL == node->topology) {
@ -1077,6 +1116,10 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer)
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n+offset))) {
continue;
}
opal_output_verbose(1, orte_nidmap_output,
"%s ASSIGNING NODE %s WITH TOPO: %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
node->name, t2->sig);
if (NULL == node->topology) {
OBJ_RETAIN(t2);
node->topology = t2;

Просмотреть файл

@ -44,6 +44,8 @@ BEGIN_C_DECLS
#define ORTE_NON_CONTIG_NODE_CMD 0x02
ORTE_DECLSPEC void orte_util_nidmap_init(void);
ORTE_DECLSPEC int orte_util_nidmap_create(char **regex);
ORTE_DECLSPEC int orte_util_nidmap_parse(char *regex);