From 52e81ee4b124172ffe99ebb056cd6cd40221b5cb Mon Sep 17 00:00:00 2001 From: Boris Karasev Date: Thu, 1 Feb 2018 16:55:41 +0200 Subject: [PATCH] rmaps: fixed the ordering of `mpirun` target nodes Fixed the desync of job-nodelists between mpirun and orted daemons. The issue was observed when using RSH launching because user can provide arbitrary order of nodes regarding HNP placement. The mpirun process propagate the daemon's nodelist order to nodes. The problem was that HNP itself is assembling the nodelist based on user provided order. As the result ranks assignment was calculated differently on orted and mpirun. Consider following example: * User launches mpirun on node cn2. * Hostlist is cn1,cn2,cn3,cn4; ppn=1 * mpirun is passing hostlist cn[2:2,1,3-4]@0(4) to orteds So as result mpirun will assing rank 0 on cn1 while orted will assign rank 0 on cn2 (because orted sees cn2 as the first element in the node list) Signed-off-by: Boris Karasev --- orte/mca/rmaps/base/rmaps_base_support_fns.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/orte/mca/rmaps/base/rmaps_base_support_fns.c b/orte/mca/rmaps/base/rmaps_base_support_fns.c index 88dfa07bce..2b8c248de8 100644 --- a/orte/mca/rmaps/base/rmaps_base_support_fns.c +++ b/orte/mca/rmaps/base/rmaps_base_support_fns.c @@ -253,13 +253,12 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr /* find the nodes in our node array and assemble them * in daemon order if the vm was launched */ - while (NULL != (item = opal_list_remove_first(&nodes))) { - nptr = (orte_node_t*)item; + for (i=0; i < orte_node_pool->size; i++) { nd = NULL; - for (i=0; i < orte_node_pool->size; i++) { - if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { - continue; - } + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { + continue; + } + OPAL_LIST_FOREACH_SAFE(nptr, next, &nodes, orte_node_t) { if (0 != strcmp(node->name, nptr->name)) { OPAL_OUTPUT_VERBOSE((10, orte_rmaps_base_framework.framework_output, "NODE %s DOESNT MATCH NODE %s", @@ -332,8 +331,9 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr /* reset us back to the end for the next node */ nd = (orte_node_t*)opal_list_get_last(allocated_nodes); } + opal_list_remove_item(&nodes, (opal_list_item_t*)nptr); + OBJ_RELEASE(nptr); } - OBJ_RELEASE(nptr); } OBJ_DESTRUCT(&nodes); /* now prune for usage and compute total slots */