From 61a71e25effe61a121290a7913f89f2996f63d17 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 14 Mar 2017 10:06:43 -0700 Subject: [PATCH] Ensure the backend daemons know if we are in a managed allocation and if the HNP was included in the allocation Signed-off-by: Ralph Castain --- orte/mca/rmaps/base/rmaps_base_support_fns.c | 8 ++-- orte/util/nidmap.c | 48 ++++++++++++++++++++ 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/orte/mca/rmaps/base/rmaps_base_support_fns.c b/orte/mca/rmaps/base/rmaps_base_support_fns.c index 2b1a1ccdc3..abf8e8a956 100644 --- a/orte/mca/rmaps/base/rmaps_base_support_fns.c +++ b/orte/mca/rmaps/base/rmaps_base_support_fns.c @@ -12,7 +12,7 @@ * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -340,7 +340,7 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr goto complete; } - addknown: + addknown: /* if the hnp was allocated, include it unless flagged not to */ if (orte_hnp_is_allocated && !(ORTE_GET_MAPPING_DIRECTIVE(policy) & ORTE_MAPPING_NO_USE_LOCAL)) { if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0))) { @@ -416,7 +416,7 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); } if (NULL == nd || NULL == nd->daemon || - NULL == node->daemon || + NULL == node->daemon || nd->daemon->name.vpid < node->daemon->name.vpid) { /* just append to end */ opal_list_append(allocated_nodes, &node->super); @@ -476,7 +476,7 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)opal_list_get_size(allocated_nodes))); - complete: + complete: /* remove all nodes that are already at max usage, and * compute the total number of allocated slots while * we do so */ diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index 6a77aa464e..be0437bf20 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -186,6 +186,7 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) char **regexargs = NULL, *tmp, *tmp2; orte_node_t *nptr; int rc; + uint8_t ui8; /* setup the list of results */ OBJ_CONSTRUCT(&nodenms, opal_list_t); @@ -594,6 +595,28 @@ int orte_util_encode_nodemap(opal_buffer_t *buffer) free(tmp); } + /* pack a flag indicating if the HNP was included in the allocation */ + if (orte_hnp_is_allocated) { + ui8 = 1; + } else { + ui8 = 0; + } + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &ui8, 1, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* pack a flag indicating if we are in a managed allocation */ + if (orte_managed_allocation) { + ui8 = 1; + } else { + ui8 = 0; + } + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &ui8, 1, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* handle the topologies - as the most common case by far * is to have homogeneous topologies, we only send them * if something is different */ @@ -684,6 +707,7 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) opal_buffer_t *bptr=NULL; orte_topology_t *t; orte_regex_range_t *rng, *drng, *srng, *frng; + uint8_t ui8; /* unpack the node regex */ n = 1; @@ -739,6 +763,30 @@ int orte_util_decode_daemon_nodemap(opal_buffer_t *buffer) goto cleanup; } + /* unpack the flag indicating if the HNP was allocated */ + n = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ui8, &n, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + if (0 == ui8) { + orte_hnp_is_allocated = false; + } else { + orte_hnp_is_allocated = true; + } + + /* unpack the flag indicating we are in a managed allocation */ + n = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ui8, &n, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + if (0 == ui8) { + orte_managed_allocation = false; + } else { + orte_managed_allocation = true; + } + /* unpack the topos regex - this may not have been * provided (e.g., for a homogeneous machine) */ n = 1;