diff --git a/opal/class/Makefile.am b/opal/class/Makefile.am index 1074a08621..5b8bc27fa9 100644 --- a/opal/class/Makefile.am +++ b/opal/class/Makefile.am @@ -21,6 +21,7 @@ # Source code files headers += \ + class/opal_bitmap.h \ class/opal_free_list.h \ class/opal_hash_table.h \ class/opal_list.h \ @@ -31,6 +32,7 @@ headers += \ class/opal_value_array.h libopen_pal_la_SOURCES += \ + class/opal_bitmap.c \ class/opal_free_list.c \ class/opal_hash_table.c \ class/opal_list.c \ diff --git a/opal/class/opal_bitmap.c b/opal/class/opal_bitmap.c new file mode 100644 index 0000000000..18ae6c51e7 --- /dev/null +++ b/opal/class/opal_bitmap.c @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" +#include "opal/constants.h" + +#include "opal/class/opal_bitmap.h" + + +#define SIZE_OF_CHAR ((int) (sizeof(char) * 8)) + +static void opal_bitmap_construct(opal_bitmap_t *bm); +static void opal_bitmap_destruct(opal_bitmap_t *bm); + +OBJ_CLASS_INSTANCE(opal_bitmap_t, opal_object_t, + opal_bitmap_construct, opal_bitmap_destruct); + + +static void +opal_bitmap_construct(opal_bitmap_t *bm) +{ + bm->array_size = 0; + bm->bitmap = NULL; +} + + +static void +opal_bitmap_destruct(opal_bitmap_t *bm) +{ + if (NULL != bm->bitmap) { + free(bm->bitmap); + } +} + + +int +opal_bitmap_init(opal_bitmap_t *bm, int size) +{ + int actual_size; + + if ((size <= 0) || (NULL == bm)) { + return OPAL_ERR_BAD_PARAM; + } + + actual_size = size / SIZE_OF_CHAR; + actual_size += (size % SIZE_OF_CHAR == 0) ? 0 : 1; + bm->array_size = actual_size; + bm->bitmap = (unsigned char *) malloc(actual_size); + if (NULL == bm->bitmap) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + opal_bitmap_clear_all_bits(bm); + return OPAL_SUCCESS; +} + + +int +opal_bitmap_set_bit(opal_bitmap_t *bm, int bit) +{ + int index, offset, new_size; + size_t new_size_large; + + if ((bit < 0) || (NULL == bm)) { + return OPAL_ERR_BAD_PARAM; + } + + index = bit / SIZE_OF_CHAR; + offset = bit % SIZE_OF_CHAR; + + if (index >= bm->array_size) { + + /* We need to allocate more space for the bitmap, since we are + out of range. We don't throw any error here, because this is + valid and we simply expand the bitmap */ + + new_size_large = (index / bm->array_size + 1 ) * bm->array_size; + + /* Note that new_size is guaranteed to be <= + INT_MAX, which is guaranteed to fit in a + [signed] int. */ + + new_size = (int) new_size_large; + + /* New size is just a multiple of the original size to fit in + the index. */ + + bm->bitmap = (unsigned char *) realloc(bm->bitmap, (int) new_size); + if (NULL == bm->bitmap) { + return OPAL_ERR_OUT_OF_RESOURCE; + } + + /* zero out the new elements */ + memset(&bm->bitmap[bm->array_size], 0, new_size - bm->array_size); + + /* Update the array_size */ + bm->array_size = new_size; + } + + /* Now set the bit */ + bm->bitmap[index] |= (1 << offset); + + return OPAL_SUCCESS; +} + + +int +opal_bitmap_clear_bit(opal_bitmap_t *bm, int bit) +{ + int index, offset; + + if ((bit < 0) || NULL == bm || (bit >= (bm->array_size * SIZE_OF_CHAR))) { + return OPAL_ERR_BAD_PARAM; + } + + index = bit / SIZE_OF_CHAR; + offset = bit % SIZE_OF_CHAR; + + if (index >= bm->array_size) { + return OPAL_ERR_BAD_PARAM; + } + + bm->bitmap[index] &= ~(1 << offset); + return OPAL_SUCCESS; +} + + +int +opal_bitmap_is_set_bit(opal_bitmap_t *bm, int bit) +{ + int index, offset; + + if ((bit < 0) || NULL == bm || (bit >= (bm->array_size * SIZE_OF_CHAR))) { + return OPAL_ERR_BAD_PARAM; + } + + index = bit / SIZE_OF_CHAR; + offset = bit % SIZE_OF_CHAR; + + if (index >= bm->array_size) { + return OPAL_ERR_BAD_PARAM; + } + + if (0 != (bm->bitmap[index] & (1 << offset))) { + return (int) true; + } + + return (int) false; +} + + +int +opal_bitmap_clear_all_bits(opal_bitmap_t *bm) +{ + if (NULL == bm) { + return OPAL_ERR_BAD_PARAM; + } + + memset(bm->bitmap, 0, bm->array_size); + return OPAL_SUCCESS; +} + + +int +opal_bitmap_set_all_bits(opal_bitmap_t *bm) +{ + if (NULL == bm) { + return OPAL_ERR_BAD_PARAM; + } + + memset(bm->bitmap, 0xff, bm->array_size); + + return OPAL_SUCCESS; +} + + +int +opal_bitmap_find_and_set_first_unset_bit(opal_bitmap_t *bm, int *position) +{ + int i = 0; + unsigned char temp; + unsigned char all_ones = 0xff; + + if (NULL == bm) { + return OPAL_ERR_BAD_PARAM; + } + + /* Neglect all which don't have an unset bit */ + *position = 0; + while((i < bm->array_size) && (bm->bitmap[i] == all_ones)) { + ++i; + } + + if (i == bm->array_size) { + /* increase the bitmap size then */ + *position = bm->array_size * SIZE_OF_CHAR; + return opal_bitmap_set_bit(bm, *position); + } + + /* This one has an unset bit, find its bit number */ + + temp = bm->bitmap[i]; + while (temp & 0x1) { + ++(*position); + temp >>= 1; + } + + /* Now set the bit number */ + bm->bitmap[i] |= (bm->bitmap[i] + 1); + + (*position) += i * SIZE_OF_CHAR; + return OPAL_SUCCESS; +} diff --git a/opal/class/opal_bitmap.h b/opal/class/opal_bitmap.h new file mode 100644 index 0000000000..4f03549ab3 --- /dev/null +++ b/opal/class/opal_bitmap.h @@ -0,0 +1,152 @@ +/* -*- Mode: C; c-basic-offset:4 ; -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2007 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +/** @file + * + * A bitmap implementation. The bits start off with 0, so this bitmap + * has bits numbered as bit 0, bit 1, bit 2 and so on. This bitmap + * has auto-expansion capabilities, that is once the size is set + * during init, it can be automatically expanded by setting the bit + * beyond the current size. But note, this is allowed just when the + * bit is set -- so the valid functions are set_bit and + * find_and_set_bit. Other functions like clear, if passed a bit + * outside the initialized range will result in an error. + * + */ + +#ifndef OPAL_BITMAP_H +#define OPAL_BITMAP_H + +#include "opal_config.h" + +#include + +#include "opal/class/opal_object.h" + +BEGIN_C_DECLS + +struct opal_bitmap_t { + opal_object_t super; /**< Subclass of opal_object_t */ + unsigned char *bitmap; /**< The actual bitmap array of characters */ + int array_size; /**< The actual array size that maintains the bitmap */ +}; + +typedef struct opal_bitmap_t opal_bitmap_t; + +OPAL_DECLSPEC OBJ_CLASS_DECLARATION(opal_bitmap_t); + +/** + * Initializes the bitmap and sets its size. This must be called + * before the bitmap can be actually used + * + * @param bitmap The input bitmap (IN) + * @param size The initial size of the bitmap in terms of bits (IN) + * @return OPAL error code or success + * + */ +OPAL_DECLSPEC int opal_bitmap_init (opal_bitmap_t *bm, int size); + + +/** + * Set a bit of the bitmap. If the bit asked for is beyond the current + * size of the bitmap, then the bitmap is extended to accomodate the + * bit + * + * @param bitmap The input bitmap (IN) + * @param bit The bit which is to be set (IN) + * @return OPAL error code or success + * + */ +OPAL_DECLSPEC int opal_bitmap_set_bit(opal_bitmap_t *bm, int bit); + + +/** + * Clear/unset a bit of the bitmap. If the bit is beyond the current + * size of the bitmap, an error is returned + * + * @param bitmap The input bitmap (IN) + * @param bit The bit which is to be cleared (IN) + * @return OPAL error code if the bit is out of range, else success + * + */ +OPAL_DECLSPEC int opal_bitmap_clear_bit(opal_bitmap_t *bm, int bit); + + +/** + * Find out if a bit is set in the bitmap + * + * @param bitmap The input bitmap (IN) + * @param bit The bit which is to be checked (IN) + * @return OPAL error code if the bit is out of range + * 1 if the bit is set + * 0 if the bit is not set + * + */ +OPAL_DECLSPEC int opal_bitmap_is_set_bit(opal_bitmap_t *bm, int bit); + + +/** + * Find the first clear bit in the bitmap and set it + * + * @param bitmap The input bitmap (IN) + * @param position Position of the first clear bit (OUT) + + * @return err OPAL_SUCCESS on success + */ +OPAL_DECLSPEC int opal_bitmap_find_and_set_first_unset_bit(opal_bitmap_t *bm, + int *position); + + +/** + * Clear all bits in the bitmap + * + * @param bitmap The input bitmap (IN) + * @return OPAL error code if bm is NULL + * + */ +OPAL_DECLSPEC int opal_bitmap_clear_all_bits(opal_bitmap_t *bm); + + +/** + * Set all bits in the bitmap + * @param bitmap The input bitmap (IN) + * @return OPAL error code if bm is NULL + * + */ +OPAL_DECLSPEC int opal_bitmap_set_all_bits(opal_bitmap_t *bm); + + +/** + * Gives the current size (number of bits) in the bitmap. This is the + * legal (accessible) number of bits + * + * @param bitmap The input bitmap (IN) + * @return OPAL error code if bm is NULL + * + */ +static inline int opal_bitmap_size(opal_bitmap_t *bm) +{ + return (NULL == bm) ? 0 : (bm->array_size * ((int) (sizeof(char) * 8))); +} + +END_C_DECLS + +#endif diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index 138ab09e13..ee43bc2a98 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -114,7 +114,7 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *data, /* get wireup info for daemons per the selected routing module */ wireup = OBJ_NEW(opal_buffer_t); - if (ORTE_SUCCESS != (rc = orte_routed.get_wireup_info(ORTE_PROC_MY_NAME->jobid, wireup))) { + if (ORTE_SUCCESS != (rc = orte_routed.get_wireup_info(wireup))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(wireup); return rc; @@ -507,12 +507,15 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data, goto REPORT_ERROR; } } else { - /* is this proc on one of my children in the daemon tree? */ + /* is this proc on a daemon in a branch of the daemon tree + * that is below me? If so, then the daemon collective will + * receive a message via that direct child + */ for (item = opal_list_get_first(&daemon_tree); item != opal_list_get_end(&daemon_tree); item = opal_list_get_next(item)) { nm = (orte_namelist_t*)item; - if (nm->name.vpid == node->daemon) { + if (orte_routed.proc_is_below(nm->name.vpid, node->daemon)) { /* add to the count for collectives */ jobdat->num_participating++; /* remove this node from the tree so we don't count it again */ @@ -2205,10 +2208,11 @@ static int daemon_collective(orte_process_name_t *sender, opal_buffer_t *data) } if (jobdat->num_collected == jobdat->num_participating) { - /* if I am the HNP, then this is done! Handle it below */ + /* if I am the HNP, go process the results */ if (orte_process_info.hnp) { goto hnp_process; } + /* if I am not the HNP, send to my parent */ OBJ_CONSTRUCT(&buf, opal_buffer_t); /* add the requisite command header */ diff --git a/orte/mca/rml/oob/rml_oob_contact.c b/orte/mca/rml/oob/rml_oob_contact.c index 6245e51fa9..af5d9fe09c 100644 --- a/orte/mca/rml/oob/rml_oob_contact.c +++ b/orte/mca/rml/oob/rml_oob_contact.c @@ -51,8 +51,10 @@ orte_rml_oob_set_uri(const char* uri) char** uris; char** ptr; int rc = orte_rml_base_parse_uris(uri, &name, &uris); - if(rc != ORTE_SUCCESS) + if(rc != ORTE_SUCCESS) { + ORTE_ERROR_LOG(rc); return rc; + } for(ptr = uris; ptr != NULL && *ptr != NULL; ptr++) { orte_rml_oob_module.active_oob->oob_set_addr(&name, *ptr); diff --git a/orte/mca/rml/oob/rml_oob_ping.c b/orte/mca/rml/oob/rml_oob_ping.c index e4e7950a09..f6e21a09e3 100644 --- a/orte/mca/rml/oob/rml_oob_ping.c +++ b/orte/mca/rml/oob/rml_oob_ping.c @@ -11,6 +11,9 @@ #include "rml_oob.h" #include "opal/util/argv.h" + +#include "orte/mca/errmgr/errmgr.h" + #include "orte/mca/oob/oob.h" #include "orte/mca/oob/base/base.h" #include "orte/mca/rml/base/rml_contact.h" @@ -25,6 +28,7 @@ orte_rml_oob_ping(const char* uri, int rc; if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(uri, &name, &uris))) { + ORTE_ERROR_LOG(rc); return rc; } diff --git a/orte/mca/routed/binomial/routed_binomial.c b/orte/mca/routed/binomial/routed_binomial.c index d8bbfa9846..9feceacb27 100644 --- a/orte/mca/routed/binomial/routed_binomial.c +++ b/orte/mca/routed/binomial/routed_binomial.c @@ -16,6 +16,7 @@ #include "opal/runtime/opal_progress.h" #include "opal/dss/dss.h" #include "opal/class/opal_hash_table.h" +#include "opal/class/opal_bitmap.h" #include "opal/util/bit_ops.h" #include "orte/mca/errmgr/errmgr.h" @@ -43,7 +44,8 @@ static int route_lost(const orte_process_name_t *route); static bool route_is_defined(const orte_process_name_t *target); static int update_routing_tree(void); static orte_vpid_t get_routing_tree(orte_jobid_t job, opal_list_t *children); -static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf); +static bool proc_is_below(orte_vpid_t root, orte_vpid_t target); +static int get_wireup_info(opal_buffer_t *buf); static int warmup_routes(void); #if OPAL_ENABLE_FT == 1 @@ -62,6 +64,7 @@ orte_routed_module_t orte_routed_binomial_module = { route_is_defined, update_routing_tree, get_routing_tree, + proc_is_below, get_wireup_info, #if OPAL_ENABLE_FT == 1 binomial_ft_event @@ -928,11 +931,31 @@ static bool route_is_defined(const orte_process_name_t *target) } /*************************************/ +typedef struct { + opal_list_item_t super; + orte_vpid_t vpid; + opal_bitmap_t relatives; +} orte_routed_tree_t; -static int binomial_tree(int rank, int parent, int me, int num_procs) +static void construct(orte_routed_tree_t *rt) +{ + rt->vpid = ORTE_VPID_INVALID; + OBJ_CONSTRUCT(&rt->relatives, opal_bitmap_t); +} +static void destruct(orte_routed_tree_t *rt) +{ + OBJ_DESTRUCT(&rt->relatives); +} +OBJ_CLASS_INSTANCE(orte_routed_tree_t, opal_list_item_t, + construct, destruct); + + +static int binomial_tree(int rank, int parent, int me, int num_procs, + int *nchildren, opal_list_t *childrn, opal_bitmap_t *relatives) { int i, bitmap, peer, hibit, mask, found; - orte_namelist_t *child; + orte_routed_tree_t *child; + opal_bitmap_t *relations; /* is this me? */ if (me == rank) { @@ -944,22 +967,30 @@ static int binomial_tree(int rank, int parent, int me, int num_procs) for (i = hibit + 1, mask = 1 << i; i <= bitmap; ++i, mask <<= 1) { peer = rank | mask; if (peer < num_procs) { - child = OBJ_NEW(orte_namelist_t); - child->name.jobid = ORTE_PROC_MY_NAME->jobid; - child->name.vpid = peer; + child = OBJ_NEW(orte_routed_tree_t); + child->vpid = peer; OPAL_OUTPUT_VERBOSE((3, orte_routed_base_output, "%s routed:binomial found child %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&child->name))); - - opal_list_append(&my_children, &child->item); - num_children++; + ORTE_VPID_PRINT(child->vpid))); + if (NULL != childrn) { + /* this is a direct child - add it to my list */ + opal_list_append(childrn, &child->super); + (*nchildren)++; + /* setup the relatives bitmap */ + opal_bitmap_init(&child->relatives, num_procs); + /* point to the relatives */ + relations = &child->relatives; + } else { + /* we are recording someone's relatives - set the bit */ + opal_bitmap_set_bit(relatives, peer); + /* point to this relations */ + relations = relatives; + } + /* search for this child's relatives */ + binomial_tree(0, 0, peer, num_procs, NULL, NULL, relations); } } - OPAL_OUTPUT_VERBOSE((3, orte_routed_base_output, - "%s routed:binomial found parent %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - parent)); return parent; } @@ -973,7 +1004,7 @@ static int binomial_tree(int rank, int parent, int me, int num_procs) peer = rank | mask; if (peer < num_procs) { /* execute compute on this child */ - if (0 <= (found = binomial_tree(peer, rank, me, num_procs))) { + if (0 <= (found = binomial_tree(peer, rank, me, num_procs, nchildren, childrn, relatives))) { return found; } } @@ -998,18 +1029,41 @@ static int update_routing_tree(void) } num_children = 0; - /* recompute the tree */ + /* compute my direct children and the bitmap that shows which vpids + * lie underneath their branch + */ my_parent.vpid = binomial_tree(0, 0, ORTE_PROC_MY_NAME->vpid, - orte_process_info.num_procs); + orte_process_info.num_procs, + &num_children, &my_children, NULL); +#if 0 + { + orte_routed_tree_t *child; + int j; + + opal_output(0, "%s: parent %d num_children %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), my_parent.vpid, num_children); + for (item = opal_list_get_first(&my_children); + item != opal_list_get_end(&my_children); + item = opal_list_get_next(item)) { + child = (orte_routed_tree_t*)item; + opal_output(0, "%s: \tchild %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), child->vpid); + for (j=0; j < (int)orte_process_info.num_procs; j++) { + if (opal_bitmap_is_set_bit(&child->relatives, j)) { + opal_output(0, "%s: \t\trelation %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j); + } + } + } + } +#endif + return ORTE_SUCCESS; } -static orte_vpid_t get_routing_tree(orte_jobid_t job, - opal_list_t *children) +static orte_vpid_t get_routing_tree(orte_jobid_t job, opal_list_t *children) { opal_list_item_t *item; - orte_namelist_t *nm, *child; + orte_namelist_t *nm; + orte_routed_tree_t *child; /* if I am anything other than a daemon or the HNP, this * is a meaningless command as I am not allowed to route @@ -1025,10 +1079,10 @@ static orte_vpid_t get_routing_tree(orte_jobid_t job, for (item = opal_list_get_first(&my_children); item != opal_list_get_end(&my_children); item = opal_list_get_next(item)) { - child = (orte_namelist_t*)item; + child = (orte_routed_tree_t*)item; nm = OBJ_NEW(orte_namelist_t); - nm->name.jobid = child->name.jobid; - nm->name.vpid = child->name.vpid; + nm->name.jobid = ORTE_PROC_MY_NAME->jobid; + nm->name.vpid = child->vpid; opal_list_append(children, &nm->item); } } @@ -1036,7 +1090,7 @@ static orte_vpid_t get_routing_tree(orte_jobid_t job, return my_parent.vpid; } -static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf) +static int get_wireup_info(opal_buffer_t *buf) { int rc; @@ -1055,7 +1109,7 @@ static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf) return ORTE_SUCCESS; } - if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(job, buf))) { + if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(ORTE_PROC_MY_NAME->jobid, buf))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buf); return rc; @@ -1064,6 +1118,43 @@ static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf) return ORTE_SUCCESS; } +static bool proc_is_below(orte_vpid_t root, orte_vpid_t target) +{ + opal_list_item_t *item; + orte_routed_tree_t *child; + + /* if I am anything other than a daemon or the HNP, this + * is a meaningless command as I am not allowed to route + */ + if (!orte_process_info.daemon && !orte_process_info.hnp) { + return false; + } + + /* quick check: if root == target, then the answer is always true! */ + if (root == target) { + return true; + } + + /* check the list of children to see if either their vpid + * matches target, or the target bit is set in their bitmap + */ + + /* first find the specified child */ + for (item = opal_list_get_first(&my_children); + item != opal_list_get_end(&my_children); + item = opal_list_get_next(item)) { + child = (orte_routed_tree_t*)item; + if (child->vpid == root) { + /* now see if the target lies below this child */ + return opal_bitmap_is_set_bit(&child->relatives, target); + } + } + + /* only get here if we have no children or we didn't find anything */ + return false; +} + + #if OPAL_ENABLE_FT == 1 static int binomial_ft_event(int state) { diff --git a/orte/mca/routed/direct/routed_direct.c b/orte/mca/routed/direct/routed_direct.c index 5020cb00f9..cbea363f20 100644 --- a/orte/mca/routed/direct/routed_direct.c +++ b/orte/mca/routed/direct/routed_direct.c @@ -54,7 +54,8 @@ static int route_lost(const orte_process_name_t *route); static bool route_is_defined(const orte_process_name_t *target); static int update_routing_tree(void); static orte_vpid_t get_routing_tree(orte_jobid_t job, opal_list_t *children); -static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf); +static bool proc_is_below(orte_vpid_t root, orte_vpid_t target); +static int get_wireup_info(opal_buffer_t *buf); static int warmup_routes(void); #if OPAL_ENABLE_FT == 1 @@ -73,6 +74,7 @@ orte_routed_module_t orte_routed_direct_module = { route_is_defined, update_routing_tree, get_routing_tree, + proc_is_below, get_wireup_info, #if OPAL_ENABLE_FT == 1 direct_ft_event @@ -865,8 +867,7 @@ static int update_routing_tree(void) return ORTE_SUCCESS; } -static orte_vpid_t get_routing_tree(orte_jobid_t job, - opal_list_t *children) +static orte_vpid_t get_routing_tree(orte_jobid_t job, opal_list_t *children) { orte_namelist_t *nm; orte_job_t *jdata; @@ -896,7 +897,7 @@ static orte_vpid_t get_routing_tree(orte_jobid_t job, ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_VPID_INVALID; } - /* if this is the daemon job, don't include myself */ + /* if this is to the daemons, don't include myself */ if (ORTE_PROC_MY_NAME->jobid == job) { start = 1; } else { @@ -914,7 +915,25 @@ static orte_vpid_t get_routing_tree(orte_jobid_t job, return ORTE_VPID_INVALID; } -static int get_wireup_info(orte_jobid_t jobid, opal_buffer_t *buf) +static bool proc_is_below(orte_vpid_t root, orte_vpid_t target) +{ + /* this is a flat routing tree - if I am not the HNP, then + * nobody is below + */ + if (!orte_process_info.hnp) { + return false; + } + /* if I am the HNP, then the route is through the root + * if the root is the target + */ + if (root == target) { + return true; + } + /* otherwise, not */ + return false; +} + +static int get_wireup_info(opal_buffer_t *buf) { int rc; @@ -926,7 +945,7 @@ static int get_wireup_info(orte_jobid_t jobid, opal_buffer_t *buf) return ORTE_ERR_NOT_SUPPORTED; } - if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(jobid, buf))) { + if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(ORTE_PROC_MY_NAME->jobid, buf))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buf); return rc; diff --git a/orte/mca/routed/linear/routed_linear.c b/orte/mca/routed/linear/routed_linear.c index 76a465c622..dca132e4b3 100644 --- a/orte/mca/routed/linear/routed_linear.c +++ b/orte/mca/routed/linear/routed_linear.c @@ -42,7 +42,8 @@ static int route_lost(const orte_process_name_t *route); static bool route_is_defined(const orte_process_name_t *target); static int update_routing_tree(void); static orte_vpid_t get_routing_tree(orte_jobid_t job, opal_list_t *children); -static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf); +static bool proc_is_below(orte_vpid_t root, orte_vpid_t target); +static int get_wireup_info(opal_buffer_t *buf); static int warmup_routes(void); #if OPAL_ENABLE_FT == 1 @@ -61,6 +62,7 @@ orte_routed_module_t orte_routed_linear_module = { route_is_defined, update_routing_tree, get_routing_tree, + proc_is_below, get_wireup_info, #if OPAL_ENABLE_FT == 1 linear_ft_event @@ -926,8 +928,7 @@ static int update_routing_tree(void) return ORTE_SUCCESS; } -static orte_vpid_t get_routing_tree(orte_jobid_t job, - opal_list_t *children) +static orte_vpid_t get_routing_tree(orte_jobid_t job, opal_list_t *children) { orte_namelist_t *nm; @@ -960,7 +961,22 @@ static orte_vpid_t get_routing_tree(orte_jobid_t job, } -static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf) +static bool proc_is_below(orte_vpid_t root, orte_vpid_t target) +{ + /* if the target is less than the root, then the path + * cannot lie through the root + */ + + if (target < root) { + return false; + } + + /* otherwise, it does! */ + return true; +} + + +static int get_wireup_info(opal_buffer_t *buf) { int rc; @@ -979,7 +995,7 @@ static int get_wireup_info(orte_jobid_t job, opal_buffer_t *buf) return ORTE_SUCCESS; } - if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(job, buf))) { + if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(ORTE_PROC_MY_NAME->jobid, buf))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buf); return rc; diff --git a/orte/mca/routed/routed.h b/orte/mca/routed/routed.h index 4df980e111..9a2d42dffc 100644 --- a/orte/mca/routed/routed.h +++ b/orte/mca/routed/routed.h @@ -174,14 +174,12 @@ typedef int (*orte_routed_module_route_lost_fn_t)(const orte_process_name_t *rou typedef bool (*orte_routed_module_route_is_defined_fn_t)(const orte_process_name_t *target); /** - * Get wireup data for the specified job + * Get wireup data for daemons * - * Given a jobid and a pointer to a buffer, add whatever routing data - * this module requires to allow inter-process messaging for the - * job. + * Add whatever routing data + * this module requires to allow inter-process messaging. Only callable by HNP. */ -typedef int (*orte_routed_module_get_wireup_info_fn_t)(orte_jobid_t job, - opal_buffer_t *buf); +typedef int (*orte_routed_module_get_wireup_info_fn_t)(opal_buffer_t *buf); /* * Update the module's routing tree for this process @@ -199,10 +197,31 @@ typedef int (*orte_routed_module_update_routing_tree_fn_t)(void); * Get the routing tree for this process * * Fills the provided list with the direct children of this process - * in the routing tree, and returns the vpid of the parent + * in the routing tree, and returns the vpid of the parent. Only valid + * when called by a daemon or the HNP. */ -typedef orte_vpid_t (*orte_routed_module_get_routing_tree_fn_t)(orte_jobid_t job, - opal_list_t *children); +typedef orte_vpid_t (*orte_routed_module_get_routing_tree_fn_t)(orte_jobid_t job, opal_list_t *children); + +/* + * Is the specified process below the given root in the routing tree graph? + * + * Checks the routing tree to see if the specified process lies below the root + * in the graph. This is required to support the daemon collective process. + * It differs from get_route in that it is not concerned with identifying + * the next hop to take in communication routing, thus allowing the two + * (routing vs collective) to differ. + * + * RHC: eventually, we may want to merge the two functions. However, it is + * also possible we may want to maintain separation so that we can have + * daemon collectives that follow an initial wiring pattern, but also allow + * for dynamically defined comm patterns. + * + * @retval TRUE Path flows through the root. The path + * may require multiple steps before reaching the specified process. + * @retval FALSE Path does not lie below. + * + */ +typedef bool (*orte_routed_module_proc_is_below_fn_t)(orte_vpid_t root, orte_vpid_t target); /* * Warmup routes @@ -248,6 +267,7 @@ struct orte_routed_module_t { /* fns for daemons */ orte_routed_module_update_routing_tree_fn_t update_routing_tree; orte_routed_module_get_routing_tree_fn_t get_routing_tree; + orte_routed_module_proc_is_below_fn_t proc_is_below; orte_routed_module_get_wireup_info_fn_t get_wireup_info; /* FT Notification */ orte_routed_module_ft_event_fn_t ft_event; diff --git a/orte/runtime/data_type_support/orte_dt_print_fns.c b/orte/runtime/data_type_support/orte_dt_print_fns.c index bd64f96447..4486ed6f3a 100644 --- a/orte/runtime/data_type_support/orte_dt_print_fns.c +++ b/orte/runtime/data_type_support/orte_dt_print_fns.c @@ -590,7 +590,6 @@ int orte_dt_print_map(char **output, char *prefix, orte_job_map_t *src, opal_dat (src->pernode) ? "TRUE" : "FALSE", (long)src->npernode, (src->oversubscribe) ? "TRUE" : "FALSE", (src->cpu_lists) ? "TRUE" : "FALSE"); - free(pfx2); if (ORTE_VPID_INVALID == src->daemon_vpid_start) { asprintf(&tmp2, "%s\n%sNum new daemons: %ld\tNew daemon starting vpid INVALID\n%sNum nodes: %ld", @@ -616,14 +615,10 @@ int orte_dt_print_map(char **output, char *prefix, orte_job_map_t *src, opal_dat free(tmp); return rc; } - if (NULL == tmp) { - tmp = tmp2; - } else { - asprintf(&tmp3, "%s\n%s", tmp, tmp2); - free(tmp); - free(tmp2); - tmp = tmp3; - } + asprintf(&tmp3, "%s\n%s", tmp, tmp2); + free(tmp); + free(tmp2); + tmp = tmp3; } } @@ -633,6 +628,7 @@ int orte_dt_print_map(char **output, char *prefix, orte_job_map_t *src, opal_dat free(tmp); tmp = tmp2; } + free(pfx2); /* set the return */ *output = tmp; diff --git a/orte/test/system/binom.c b/orte/test/system/binom.c index 63b3e81a54..a99401daea 100644 --- a/orte/test/system/binom.c +++ b/orte/test/system/binom.c @@ -14,61 +14,37 @@ #include "opal/util/bit_ops.h" #include "opal/class/opal_list.h" +#include "opal/class/opal_bitmap.h" #include "orte/util/name_fns.h" #include "orte/runtime/orte_globals.h" #include "orte/runtime/runtime.h" -int down_search(int rank, int parent, int me, int num_procs, - int *num_children, opal_list_t *children) -{ - int i, bitmap, peer, hibit, mask, found; - orte_namelist_t *child; - - /* is this me? */ - if (me == rank) { - bitmap = opal_cube_dim(num_procs); - - hibit = opal_hibit(rank, bitmap); - --bitmap; - - for (i = hibit + 1, mask = 1 << i; i <= bitmap; ++i, mask <<= 1) { - peer = rank | mask; - if (peer < num_procs) { - if (NULL != children) { - child = OBJ_NEW(orte_namelist_t); - child->name.jobid = ORTE_PROC_MY_NAME->jobid; - child->name.vpid = peer; - opal_list_append(children, &child->item); - } - (*num_children)++; - } - } - return parent; - } - - /* find the children of this rank */ - bitmap = opal_cube_dim(num_procs); - - hibit = opal_hibit(rank, bitmap); - --bitmap; - - for (i = hibit + 1, mask = 1 << i; i <= bitmap; ++i, mask <<= 1) { - peer = rank | mask; - if (peer < num_procs) { - /* execute compute on this child */ - if (0 <= (found = down_search(peer, rank, me, num_procs, num_children, children))) { - return found; - } - } - } - return -1; -} +typedef struct { + opal_list_item_t super; + orte_vpid_t vpid; + opal_bitmap_t relatives; +} orte_routed_tree_t; -#if 0 -int down_search(int rank, int parent, int me) +static void construct(orte_routed_tree_t *rt) +{ + rt->vpid = ORTE_VPID_INVALID; + OBJ_CONSTRUCT(&rt->relatives, opal_bitmap_t); +} +static destruct(orte_routed_tree_t *rt) +{ + OBJ_DESTRUCT(&rt->relatives); +} +OBJ_CLASS_INSTANCE(orte_routed_tree_t, opal_list_item_t, + construct, destruct); + + +int down_search(int rank, int parent, int me, int num_procs, + int *num_children, opal_list_t *children, opal_bitmap_t *relatives) { int i, bitmap, peer, hibit, mask, found; + orte_routed_tree_t *child; + opal_bitmap_t *relations; /* is this me? */ if (me == rank) { @@ -77,12 +53,27 @@ int down_search(int rank, int parent, int me) hibit = opal_hibit(rank, bitmap); --bitmap; - printf("\tfound parent %d\n", parent); for (i = hibit + 1, mask = 1 << i; i <= bitmap; ++i, mask <<= 1) { peer = rank | mask; if (peer < num_procs) { - printf("\tchild: %d\n", peer); - num_children++; + child = OBJ_NEW(orte_routed_tree_t); + child->vpid = peer; + if (NULL != children) { + /* this is a direct child - add it to my list */ + opal_list_append(children, &child->super); + (*num_children)++; + /* setup the relatives bitmap */ + opal_bitmap_init(&child->relatives, num_procs); + /* point to the relatives */ + relations = &child->relatives; + } else { + /* we are recording someone's relatives - set the bit */ + opal_bitmap_set_bit(relatives, peer); + /* point to this relations */ + relations = relatives; + } + /* search for this child's relatives */ + down_search(0, 0, peer, num_procs, NULL, NULL, relations); } } return parent; @@ -98,38 +89,48 @@ int down_search(int rank, int parent, int me) peer = rank | mask; if (peer < num_procs) { /* execute compute on this child */ - if (0 <= (found = down_search(peer, rank, me))) { + if (0 <= (found = down_search(peer, rank, me, num_procs, num_children, children, relatives))) { return found; } } } return -1; } -#endif int main(int argc, char* argv[]) { - int i; + int i, j; int found; opal_list_t children; opal_list_item_t *item; int num_children; int num_procs; - orte_namelist_t *child; + orte_routed_tree_t *child; + opal_bitmap_t *relations; + + if (2 != argc) { + printf("usage: binom x, where x=number of procs\n"); + exit(1); + } orte_init(ORTE_TOOL); - num_procs = 32; + num_procs = atoi(argv[1]); for (i=0; i < num_procs; i++) { OBJ_CONSTRUCT(&children, opal_list_t); num_children = 0; printf("i am %d:", i); - found = down_search(0, 0, i, num_procs, &num_children, &children); + found = down_search(0, 0, i, num_procs, &num_children, &children, NULL); printf("\tparent %d num_children %d\n", found, num_children); while (NULL != (item = opal_list_remove_first(&children))) { - child = (orte_namelist_t*)item; - printf("\t\tchild %d\n", child->name.vpid); + child = (orte_routed_tree_t*)item; + printf("\tchild %d\n", child->vpid); + for (j=0; j < num_procs; j++) { + if (opal_bitmap_is_set_bit(&child->relatives, j)) { + printf("\t\trelation %d\n", j); + } + } OBJ_RELEASE(item); } OBJ_DESTRUCT(&children);