From 125d2361731ad905ddd0b1363b749dd95d974ffa Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Tue, 29 Jan 2019 16:02:21 -0800 Subject: [PATCH 1/8] Move from the use of regex to compression We've been fighting the battle of trying to create a regex generator and parser that can handle arbitrary hostname schemes - without long-term success. The worst of it is that there is no way of checking to see if the computed regex is correct short of parsing it and doing a character-by-character comparison with the original string. Ugh...there has to be a better solution. One option is to investigate using 3rd-party regex libraries as those are coming from communities whose sole focus is resolving that problem. However, someone would need to spend the time to investigate it, and we'd have to find a license-friendly implementation. Another option is to quit beating our heads against the wall and just compress the information. It won't be as much of a reduction, but we also won't keep hitting scenarios where things break. In this case, it seems that "perfection" is definitely the enemy of "good enough". This PR implements the compression option while retaining the possibility of people adding regex-generating components. The compression code used in ORTE is consolidated into the opal/compress framework. That framework currently held bzip and gzip components for use in compressing checkpoint files - since we no longer support C/R, I have .opal_ignore'd those components. However, I have left the original framework APIs alone in case someone ever decides to redo C/R. The APIs of interest here are added to the framework - specifically, the "compress_block" and "decompress_block" functions. I then moved the ORTE zlib compression code into a new component in this framework. Unfortunately, the framework currently is a single-select one - i.e., only one active component at a time. Since I .opal_ignore'd the other two and made the priority of zlib high, this isn't a problem. However, if someone wants to re-enable bzip/gzip or add another component, they might need to transition opal/compress to a multi-select framework. Included changes: * Consolidate the compression code into the opal/compress framework * Move the ORTE zlib compression code into a new opal/compress/zlib component * Ignore the bzip and gzip components in opal/compress framework * Add a "compress_base_limit" MCA param to set the threshold above which we compress data - defaults to 4096 bytes * Delete stale brucks and rcd components from orte/grpcomm framework * Delete the orte/regx framework * Update the launch system to use opal/compress instead of string regex * Provide a default module if no zlib is available * Fix some misc multi-node issues * Properly generate the nidmap in response to a "connection warmup" message so the remote daemon knows the children it needs to launch. * Remove stale references to orte_node_regex * opal_byte_object_t's are not OPAL objects - properly release allocated memory. * Set the topology * Currently only handling homogeneous case * Update the compress framework files to conform * Consolidate open/close into one "frame" file. Ensure we open/close the framework Signed-off-by: Ralph Castain --- ompi/op/op.h | 2 + opal/mca/compress/base/Makefile.am | 4 +- opal/mca/compress/base/base.h | 7 + opal/mca/compress/base/compress_base_close.c | 36 - ...ress_base_open.c => compress_base_frame.c} | 44 +- opal/mca/compress/base/compress_base_select.c | 17 +- .../mca/compress/bzip}/.opal_ignore | 0 .../compress/bzip/compress_bzip_component.c | 30 +- opal/mca/compress/compress.h | 19 + .../mca/compress/gzip}/.opal_ignore | 0 .../compress/gzip/compress_gzip_component.c | 30 +- opal/mca/compress/zlib/Makefile.am | 42 + opal/mca/compress/zlib/compress_zlib.c | 133 ++ opal/mca/compress/zlib/compress_zlib.h | 66 + .../compress/zlib/compress_zlib_component.c | 149 ++ opal/mca/compress/zlib/configure.m4 | 102 ++ .../mca/compress/zlib}/owner.txt | 4 +- opal/runtime/opal_init.c | 14 +- .../default_orted/errmgr_default_orted.c | 52 +- orte/mca/ess/base/ess_base_std_orted.c | 65 +- orte/mca/ess/hnp/ess_hnp_module.c | 13 +- orte/mca/grpcomm/base/grpcomm_base_stubs.c | 8 +- orte/mca/grpcomm/brucks/Makefile.am | 41 - orte/mca/grpcomm/brucks/grpcomm_brucks.h | 31 - .../grpcomm/brucks/grpcomm_brucks_component.c | 84 -- .../grpcomm/brucks/grpcomm_brucks_module.c | 388 ----- orte/mca/grpcomm/direct/grpcomm_direct.c | 39 +- orte/mca/grpcomm/rcd/Makefile.am | 41 - orte/mca/grpcomm/rcd/grpcomm_rcd.c | 329 ----- orte/mca/grpcomm/rcd/grpcomm_rcd.h | 31 - orte/mca/grpcomm/rcd/grpcomm_rcd_component.c | 84 -- orte/mca/grpcomm/rcd/owner.txt | 7 - orte/mca/odls/base/odls_base_default_fns.c | 230 ++- orte/mca/oob/tcp/oob_tcp_component.c | 7 +- orte/mca/plm/base/plm_base_launch_support.c | 54 +- orte/mca/regx/Makefile.am | 30 - orte/mca/regx/base/Makefile.am | 18 - orte/mca/regx/base/base.h | 74 - orte/mca/regx/base/owner.txt | 7 - orte/mca/regx/base/regx_base_default_fns.c | 1282 ----------------- orte/mca/regx/base/regx_base_frame.c | 77 - orte/mca/regx/base/regx_base_select.c | 61 - orte/mca/regx/fwd/Makefile.am | 36 - orte/mca/regx/fwd/owner.txt | 7 - orte/mca/regx/fwd/regx_fwd.c | 300 ---- orte/mca/regx/fwd/regx_fwd.h | 28 - orte/mca/regx/fwd/regx_fwd_component.c | 44 - orte/mca/regx/regx.h | 127 -- orte/mca/regx/reverse/Makefile.am | 36 - orte/mca/regx/reverse/owner.txt | 7 - orte/mca/regx/reverse/regx_reverse.c | 319 ---- orte/mca/regx/reverse/regx_reverse.h | 28 - .../mca/regx/reverse/regx_reverse_component.c | 44 - orte/mca/rml/base/rml_base_msg_handlers.c | 6 +- orte/orted/orted_comm.c | 6 +- orte/orted/orted_main.c | 41 +- orte/runtime/orte_globals.c | 5 +- orte/runtime/orte_globals.h | 5 +- orte/runtime/orte_mca_params.c | 17 +- orte/util/Makefile.am | 8 +- orte/util/compress.c | 117 -- orte/util/compress.h | 53 - orte/util/nidmap.c | 793 ++++++++++ orte/util/nidmap.h | 43 + 64 files changed, 1609 insertions(+), 4213 deletions(-) delete mode 100644 opal/mca/compress/base/compress_base_close.c rename opal/mca/compress/base/{compress_base_open.c => compress_base_frame.c} (58%) rename {orte/mca/grpcomm/brucks => opal/mca/compress/bzip}/.opal_ignore (100%) rename {orte/mca/grpcomm/rcd => opal/mca/compress/gzip}/.opal_ignore (100%) create mode 100644 opal/mca/compress/zlib/Makefile.am create mode 100644 opal/mca/compress/zlib/compress_zlib.c create mode 100644 opal/mca/compress/zlib/compress_zlib.h create mode 100644 opal/mca/compress/zlib/compress_zlib_component.c create mode 100644 opal/mca/compress/zlib/configure.m4 rename {orte/mca/grpcomm/brucks => opal/mca/compress/zlib}/owner.txt (79%) delete mode 100644 orte/mca/grpcomm/brucks/Makefile.am delete mode 100644 orte/mca/grpcomm/brucks/grpcomm_brucks.h delete mode 100644 orte/mca/grpcomm/brucks/grpcomm_brucks_component.c delete mode 100644 orte/mca/grpcomm/brucks/grpcomm_brucks_module.c delete mode 100644 orte/mca/grpcomm/rcd/Makefile.am delete mode 100644 orte/mca/grpcomm/rcd/grpcomm_rcd.c delete mode 100644 orte/mca/grpcomm/rcd/grpcomm_rcd.h delete mode 100644 orte/mca/grpcomm/rcd/grpcomm_rcd_component.c delete mode 100644 orte/mca/grpcomm/rcd/owner.txt delete mode 100644 orte/mca/regx/Makefile.am delete mode 100644 orte/mca/regx/base/Makefile.am delete mode 100644 orte/mca/regx/base/base.h delete mode 100644 orte/mca/regx/base/owner.txt delete mode 100644 orte/mca/regx/base/regx_base_default_fns.c delete mode 100644 orte/mca/regx/base/regx_base_frame.c delete mode 100644 orte/mca/regx/base/regx_base_select.c delete mode 100644 orte/mca/regx/fwd/Makefile.am delete mode 100644 orte/mca/regx/fwd/owner.txt delete mode 100644 orte/mca/regx/fwd/regx_fwd.c delete mode 100644 orte/mca/regx/fwd/regx_fwd.h delete mode 100644 orte/mca/regx/fwd/regx_fwd_component.c delete mode 100644 orte/mca/regx/regx.h delete mode 100644 orte/mca/regx/reverse/Makefile.am delete mode 100644 orte/mca/regx/reverse/owner.txt delete mode 100644 orte/mca/regx/reverse/regx_reverse.c delete mode 100644 orte/mca/regx/reverse/regx_reverse.h delete mode 100644 orte/mca/regx/reverse/regx_reverse_component.c delete mode 100644 orte/util/compress.c delete mode 100644 orte/util/compress.h create mode 100644 orte/util/nidmap.c create mode 100644 orte/util/nidmap.h diff --git a/ompi/op/op.h b/ompi/op/op.h index 368dd923af..742b7461f1 100644 --- a/ompi/op/op.h +++ b/ompi/op/op.h @@ -18,6 +18,8 @@ * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2019 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Triad National Security, LLC. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow diff --git a/opal/mca/compress/base/Makefile.am b/opal/mca/compress/base/Makefile.am index 385d0b3fed..47c168bd05 100644 --- a/opal/mca/compress/base/Makefile.am +++ b/opal/mca/compress/base/Makefile.am @@ -3,6 +3,7 @@ # University Research and Technology # Corporation. All rights reserved. # Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -14,7 +15,6 @@ headers += \ base/base.h libmca_compress_la_SOURCES += \ - base/compress_base_open.c \ - base/compress_base_close.c \ + base/compress_base_frame.c \ base/compress_base_select.c \ base/compress_base_fns.c diff --git a/opal/mca/compress/base/base.h b/opal/mca/compress/base/base.h index df84fe083a..02dedb3ed5 100644 --- a/opal/mca/compress/base/base.h +++ b/opal/mca/compress/base/base.h @@ -3,6 +3,7 @@ * University Research and Technology * Corporation. All rights reserved. * + * Copyright (c) 2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -27,6 +28,12 @@ extern "C" { #endif +typedef struct { + size_t compress_limit; +} opal_compress_base_t; + +OPAL_DECLSPEC extern opal_compress_base_t opal_compress_base; + /** * Initialize the COMPRESS MCA framework * diff --git a/opal/mca/compress/base/compress_base_close.c b/opal/mca/compress/base/compress_base_close.c deleted file mode 100644 index e88c80c397..0000000000 --- a/opal/mca/compress/base/compress_base_close.c +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#include -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/include/opal/constants.h" -#include "opal/mca/compress/compress.h" -#include "opal/mca/compress/base/base.h" - -int opal_compress_base_close(void) -{ - /* Compression currently only used with C/R */ - if( !opal_cr_is_enabled ) { - opal_output_verbose(10, opal_compress_base_framework.framework_output, - "compress:open: FT is not enabled, skipping!"); - return OPAL_SUCCESS; - } - - /* Call the component's finalize routine */ - if( NULL != opal_compress.finalize ) { - opal_compress.finalize(); - } - - /* Close all available modules that are open */ - return mca_base_framework_components_close (&opal_compress_base_framework, NULL); -} diff --git a/opal/mca/compress/base/compress_base_open.c b/opal/mca/compress/base/compress_base_frame.c similarity index 58% rename from opal/mca/compress/base/compress_base_open.c rename to opal/mca/compress/base/compress_base_frame.c index dfa4900409..c46a43bcc9 100644 --- a/opal/mca/compress/base/compress_base_open.c +++ b/opal/mca/compress/base/compress_base_frame.c @@ -6,6 +6,7 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,14 +24,31 @@ /* * Globals */ +static bool compress_block(uint8_t *inbytes, + size_t inlen, + uint8_t **outbytes, + size_t *olen) +{ + return false; +} + +static bool decompress_block(uint8_t **outbytes, size_t olen, + uint8_t *inbytes, size_t len) +{ + return false; +} + opal_compress_base_module_t opal_compress = { NULL, /* init */ NULL, /* finalize */ NULL, /* compress */ NULL, /* compress_nb */ NULL, /* decompress */ - NULL /* decompress_nb */ + NULL, /* decompress_nb */ + compress_block, + decompress_block }; +opal_compress_base_t opal_compress_base = {0}; opal_compress_base_component_t opal_compress_base_selected_component = {{0}}; @@ -42,6 +60,12 @@ MCA_BASE_FRAMEWORK_DECLARE(opal, compress, "COMPRESS MCA", static int opal_compress_base_register(mca_base_register_flag_t flags) { + opal_compress_base.compress_limit = 4096; + (void) mca_base_var_register("opal", "compress", "base", "limit", + "Threshold beyond which data will be compressed", + MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0, OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_READONLY, &opal_compress_base.compress_limit); + return OPAL_SUCCESS; } @@ -51,13 +75,17 @@ static int opal_compress_base_register(mca_base_register_flag_t flags) */ int opal_compress_base_open(mca_base_open_flag_t flags) { - /* Compression currently only used with C/R */ - if(!opal_cr_is_enabled) { - opal_output_verbose(10, opal_compress_base_framework.framework_output, - "compress:open: FT is not enabled, skipping!"); - return OPAL_SUCCESS; - } - /* Open up all available components */ return mca_base_framework_components_open(&opal_compress_base_framework, flags); } + +int opal_compress_base_close(void) +{ + /* Call the component's finalize routine */ + if( NULL != opal_compress.finalize ) { + opal_compress.finalize(); + } + + /* Close all available modules that are open */ + return mca_base_framework_components_close (&opal_compress_base_framework, NULL); +} diff --git a/opal/mca/compress/base/compress_base_select.c b/opal/mca/compress/base/compress_base_select.c index 6e98f33a27..b9fdadbe62 100644 --- a/opal/mca/compress/base/compress_base_select.c +++ b/opal/mca/compress/base/compress_base_select.c @@ -7,6 +7,7 @@ * * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -29,17 +30,10 @@ int opal_compress_base_select(void) { - int ret, exit_status = OPAL_SUCCESS; + int ret = OPAL_SUCCESS; opal_compress_base_component_t *best_component = NULL; opal_compress_base_module_t *best_module = NULL; - /* Compression currently only used with C/R */ - if( !opal_cr_is_enabled ) { - opal_output_verbose(10, opal_compress_base_framework.framework_output, - "compress:open: FT is not enabled, skipping!"); - return OPAL_SUCCESS; - } - /* * Select the best component */ @@ -47,8 +41,8 @@ int opal_compress_base_select(void) &opal_compress_base_framework.framework_components, (mca_base_module_t **) &best_module, (mca_base_component_t **) &best_component, NULL) ) { - /* This will only happen if no component was selected */ - exit_status = OPAL_ERROR; + /* This will only happen if no component was selected, + * in which case we use the default one */ goto cleanup; } @@ -58,12 +52,11 @@ int opal_compress_base_select(void) /* Initialize the winner */ if (NULL != best_module) { if (OPAL_SUCCESS != (ret = best_module->init()) ) { - exit_status = ret; goto cleanup; } opal_compress = *best_module; } cleanup: - return exit_status; + return ret; } diff --git a/orte/mca/grpcomm/brucks/.opal_ignore b/opal/mca/compress/bzip/.opal_ignore similarity index 100% rename from orte/mca/grpcomm/brucks/.opal_ignore rename to opal/mca/compress/bzip/.opal_ignore diff --git a/opal/mca/compress/bzip/compress_bzip_component.c b/opal/mca/compress/bzip/compress_bzip_component.c index 2d0d1493c2..f03022b418 100644 --- a/opal/mca/compress/bzip/compress_bzip_component.c +++ b/opal/mca/compress/bzip/compress_bzip_component.c @@ -4,6 +4,7 @@ * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -65,22 +66,39 @@ opal_compress_bzip_component_t mca_compress_bzip_component = { } }; +static bool nocompress(uint8_t *inbytes, + size_t inlen, + uint8_t **outbytes, + size_t *olen) +{ + return false; +} + +static bool nodecompress(uint8_t **outbytes, size_t olen, + uint8_t *inbytes, size_t len) +{ + return false; +} + /* * Bzip module */ static opal_compress_base_module_t loc_module = { /** Initialization Function */ - opal_compress_bzip_module_init, + .init = opal_compress_bzip_module_init, /** Finalization Function */ - opal_compress_bzip_module_finalize, + .finalize = opal_compress_bzip_module_finalize, /** Compress Function */ - opal_compress_bzip_compress, - opal_compress_bzip_compress_nb, + .compress = opal_compress_bzip_compress, + .compress_nb = opal_compress_bzip_compress_nb, /** Decompress Function */ - opal_compress_bzip_decompress, - opal_compress_bzip_decompress_nb + .decompress = opal_compress_bzip_decompress, + .decompress_nb = opal_compress_bzip_decompress_nb, + + .compress_block = nocompress, + .decompress_block = nodecompress }; static int compress_bzip_register (void) diff --git a/opal/mca/compress/compress.h b/opal/mca/compress/compress.h index 8b5ba6a7d4..74295526d6 100644 --- a/opal/mca/compress/compress.h +++ b/opal/mca/compress/compress.h @@ -6,6 +6,7 @@ * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. * + * Copyright (c) 2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -82,6 +83,20 @@ typedef int (*opal_compress_base_module_decompress_fn_t) typedef int (*opal_compress_base_module_decompress_nb_fn_t) (char * cname, char **fname, pid_t *child_pid); +/** + * Compress a string + * + * Arguments: + * + */ +typedef bool (*opal_compress_base_module_compress_string_fn_t)(uint8_t *inbytes, + size_t inlen, + uint8_t **outbytes, + size_t *olen); +typedef bool (*opal_compress_base_module_decompress_string_fn_t)(uint8_t **outbytes, size_t olen, + uint8_t *inbytes, size_t len); + + /** * Structure for COMPRESS components. */ @@ -117,6 +132,10 @@ struct opal_compress_base_module_1_0_0_t { /** Decompress Interface */ opal_compress_base_module_decompress_fn_t decompress; opal_compress_base_module_decompress_nb_fn_t decompress_nb; + + /* COMPRESS STRING */ + opal_compress_base_module_compress_string_fn_t compress_block; + opal_compress_base_module_decompress_string_fn_t decompress_block; }; typedef struct opal_compress_base_module_1_0_0_t opal_compress_base_module_1_0_0_t; typedef struct opal_compress_base_module_1_0_0_t opal_compress_base_module_t; diff --git a/orte/mca/grpcomm/rcd/.opal_ignore b/opal/mca/compress/gzip/.opal_ignore similarity index 100% rename from orte/mca/grpcomm/rcd/.opal_ignore rename to opal/mca/compress/gzip/.opal_ignore diff --git a/opal/mca/compress/gzip/compress_gzip_component.c b/opal/mca/compress/gzip/compress_gzip_component.c index 62be24d71b..6bbd10f791 100644 --- a/opal/mca/compress/gzip/compress_gzip_component.c +++ b/opal/mca/compress/gzip/compress_gzip_component.c @@ -4,6 +4,7 @@ * All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -65,22 +66,39 @@ opal_compress_gzip_component_t mca_compress_gzip_component = { } }; +static bool nocompress(uint8_t *inbytes, + size_t inlen, + uint8_t **outbytes, + size_t *olen) +{ + return false; +} + +static bool nodecompress(uint8_t **outbytes, size_t olen, + uint8_t *inbytes, size_t len) +{ + return false; +} + /* * Gzip module */ static opal_compress_base_module_t loc_module = { /** Initialization Function */ - opal_compress_gzip_module_init, + .init = opal_compress_gzip_module_init, /** Finalization Function */ - opal_compress_gzip_module_finalize, + .finalize = opal_compress_gzip_module_finalize, /** Compress Function */ - opal_compress_gzip_compress, - opal_compress_gzip_compress_nb, + .compress = opal_compress_gzip_compress, + .compress_nb = opal_compress_gzip_compress_nb, /** Decompress Function */ - opal_compress_gzip_decompress, - opal_compress_gzip_decompress_nb + .decompress = opal_compress_gzip_decompress, + .decompress_nb = opal_compress_gzip_decompress_nb, + + .compress_block = nocompress, + .decompress_block = nodecompress }; static int compress_gzip_register (void) diff --git a/opal/mca/compress/zlib/Makefile.am b/opal/mca/compress/zlib/Makefile.am new file mode 100644 index 0000000000..d9e2da948e --- /dev/null +++ b/opal/mca/compress/zlib/Makefile.am @@ -0,0 +1,42 @@ +# +# Copyright (c) 2004-2010 The Trustees of Indiana University. +# All rights reserved. +# Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2017 IBM Corporation. All rights reserved. +# Copyright (c) 2019 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AM_CPPFLAGS = $(compress_zlib_CPPFLAGS) + +sources = \ + compress_zlib.h \ + compress_zlib_component.c \ + compress_zlib.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_opal_compress_zlib_DSO +component_noinst = +component_install = mca_compress_zlib.la +else +component_noinst = libmca_compress_zlib.la +component_install = +endif + +mcacomponentdir = $(opallibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_compress_zlib_la_SOURCES = $(sources) +mca_compress_zlib_la_LDFLAGS = -module -avoid-version $(compress_zlib_LDFLAGS) +mca_compress_zlib_la_LIBADD = $(top_builddir)/opal/lib@OPAL_LIB_PREFIX@open-pal.la $(compress_zlib_LIBS) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_compress_zlib_la_SOURCES = $(sources) +libmca_compress_zlib_la_LDFLAGS = -module -avoid-version $(compress_zlib_LDFLAGS) +libmca_compress_zlib_la_LIBADD = $(compress_zlib_LIBS) diff --git a/opal/mca/compress/zlib/compress_zlib.c b/opal/mca/compress/zlib/compress_zlib.c new file mode 100644 index 0000000000..850fa6aa2c --- /dev/null +++ b/opal/mca/compress/zlib/compress_zlib.c @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2004-2010 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. + * + * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2019 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include +#include +#include +#include +#if HAVE_UNISTD_H +#include +#endif /* HAVE_UNISTD_H */ +#include + +#include "opal/util/opal_environ.h" +#include "opal/util/output.h" +#include "opal/util/argv.h" +#include "opal/util/opal_environ.h" +#include "opal/util/printf.h" + +#include "opal/constants.h" +#include "opal/util/basename.h" + +#include "opal/mca/compress/compress.h" +#include "opal/mca/compress/base/base.h" + +#include "compress_zlib.h" + +int opal_compress_zlib_module_init(void) +{ + return OPAL_SUCCESS; +} + +int opal_compress_zlib_module_finalize(void) +{ + return OPAL_SUCCESS; +} + +bool opal_compress_zlib_compress_block(uint8_t *inbytes, + size_t inlen, + uint8_t **outbytes, + size_t *olen) +{ + z_stream strm; + size_t len; + uint8_t *tmp; + + if (inlen < opal_compress_base.compress_limit) { + return false; + } + opal_output_verbose(2, opal_compress_base_framework.framework_output, + "COMPRESSING"); + + /* set default output */ + *outbytes = NULL; + *olen = 0; + + /* setup the stream */ + memset (&strm, 0, sizeof (strm)); + deflateInit (&strm, 9); + + /* get an upper bound on the required output storage */ + len = deflateBound(&strm, inlen); + if (NULL == (tmp = (uint8_t*)malloc(len))) { + return false; + } + strm.next_in = inbytes; + strm.avail_in = inlen; + + /* allocating the upper bound guarantees zlib will + * always successfully compress into the available space */ + strm.avail_out = len; + strm.next_out = tmp; + + deflate (&strm, Z_FINISH); + deflateEnd (&strm); + + *outbytes = tmp; + *olen = len - strm.avail_out; + opal_output_verbose(2, opal_compress_base_framework.framework_output, + "\tINSIZE %d OUTSIZE %d", (int)inlen, (int)*olen); + return true; // we did the compression +} + +bool opal_compress_zlib_uncompress_block(uint8_t **outbytes, size_t olen, + uint8_t *inbytes, size_t len) +{ + uint8_t *dest; + z_stream strm; + + /* set the default error answer */ + *outbytes = NULL; + opal_output_verbose(2, opal_compress_base_framework.framework_output, "DECOMPRESS"); + + /* setting destination to the fully decompressed size */ + dest = (uint8_t*)malloc(olen); + if (NULL == dest) { + return false; + } + + memset (&strm, 0, sizeof (strm)); + if (Z_OK != inflateInit(&strm)) { + free(dest); + return false; + } + strm.avail_in = len; + strm.next_in = inbytes; + strm.avail_out = olen; + strm.next_out = dest; + + if (Z_STREAM_END != inflate (&strm, Z_FINISH)) { + opal_output(0, "\tDECOMPRESS FAILED: %s", strm.msg); + } + inflateEnd (&strm); + *outbytes = dest; + opal_output_verbose(2, opal_compress_base_framework.framework_output, + "\tINSIZE: %d OUTSIZE %d", (int)len, (int)olen); + return true; +} diff --git a/opal/mca/compress/zlib/compress_zlib.h b/opal/mca/compress/zlib/compress_zlib.h new file mode 100644 index 0000000000..44e08d5408 --- /dev/null +++ b/opal/mca/compress/zlib/compress_zlib.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2004-2010 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2019 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +/** + * @file + * + * ZLIB COMPRESS component + * + * Uses the zlib library + */ + +#ifndef MCA_COMPRESS_ZLIB_EXPORT_H +#define MCA_COMPRESS_ZLIB_EXPORT_H + +#include "opal_config.h" + +#include "opal/util/output.h" + +#include "opal/mca/mca.h" +#include "opal/mca/compress/compress.h" + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + + /* + * Local Component structures + */ + struct opal_compress_zlib_component_t { + opal_compress_base_component_t super; /** Base COMPRESS component */ + + }; + typedef struct opal_compress_zlib_component_t opal_compress_zlib_component_t; + extern opal_compress_zlib_component_t mca_compress_zlib_component; + + int opal_compress_zlib_component_query(mca_base_module_t **module, int *priority); + + /* + * Module functions + */ + int opal_compress_zlib_module_init(void); + int opal_compress_zlib_module_finalize(void); + + /* + * Actual funcationality + */ + bool opal_compress_zlib_compress_block(uint8_t *inbytes, + size_t inlen, + uint8_t **outbytes, + size_t *olen); + bool opal_compress_zlib_uncompress_block(uint8_t **outbytes, size_t olen, + uint8_t *inbytes, size_t len); + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + +#endif /* MCA_COMPRESS_ZLIB_EXPORT_H */ diff --git a/opal/mca/compress/zlib/compress_zlib_component.c b/opal/mca/compress/zlib/compress_zlib_component.c new file mode 100644 index 0000000000..9e2e38b6fb --- /dev/null +++ b/opal/mca/compress/zlib/compress_zlib_component.c @@ -0,0 +1,149 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2010 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2015 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2019 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "opal_config.h" + +#include "opal/constants.h" +#include "opal/mca/compress/compress.h" +#include "opal/mca/compress/base/base.h" +#include "compress_zlib.h" + +/* + * Public string for version number + */ +const char *opal_compress_zlib_component_version_string = +"OPAL COMPRESS zlib MCA component version " OPAL_VERSION; + +/* + * Local functionality + */ +static int compress_zlib_register (void); +static int compress_zlib_open(void); +static int compress_zlib_close(void); + +/* + * Instantiate the public struct with all of our public information + * and pointer to our public functions in it + */ +opal_compress_zlib_component_t mca_compress_zlib_component = { + /* First do the base component stuff */ + { + /* Handle the general mca_component_t struct containing + * meta information about the component itzlib + */ + .base_version = { + OPAL_COMPRESS_BASE_VERSION_2_0_0, + + /* Component name and version */ + .mca_component_name = "zlib", + MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, + OPAL_RELEASE_VERSION), + + /* Component open and close functions */ + .mca_open_component = compress_zlib_open, + .mca_close_component = compress_zlib_close, + .mca_query_component = opal_compress_zlib_component_query, + .mca_register_component_params = compress_zlib_register + }, + .base_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, + + .verbose = 0, + .output_handle = -1, + } +}; + +/* + * Zlib module + */ +static opal_compress_base_module_t loc_module = { + /** Initialization Function */ + .init = opal_compress_zlib_module_init, + /** Finalization Function */ + .finalize = opal_compress_zlib_module_finalize, + + /** Compress Function */ + .compress_block = opal_compress_zlib_compress_block, + + /** Decompress Function */ + .decompress_block = opal_compress_zlib_uncompress_block, +}; + +static int compress_zlib_register (void) +{ + int ret; + + mca_compress_zlib_component.super.priority = 50; + ret = mca_base_component_var_register (&mca_compress_zlib_component.super.base_version, + "priority", "Priority of the COMPRESS zlib component " + "(default: 50)", MCA_BASE_VAR_TYPE_INT, NULL, 0, + MCA_BASE_VAR_FLAG_SETTABLE, + OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_ALL_EQ, + &mca_compress_zlib_component.super.priority); + if (0 > ret) { + return ret; + } + + mca_compress_zlib_component.super.verbose = 0; + ret = mca_base_component_var_register (&mca_compress_zlib_component.super.base_version, + "verbose", + "Verbose level for the COMPRESS zlib component", + MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, + OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL, + &mca_compress_zlib_component.super.verbose); + return (0 > ret) ? ret : OPAL_SUCCESS; +} + +static int compress_zlib_open(void) +{ + /* If there is a custom verbose level for this component than use it + * otherwise take our parents level and output channel + */ + if ( 0 != mca_compress_zlib_component.super.verbose) { + mca_compress_zlib_component.super.output_handle = opal_output_open(NULL); + opal_output_set_verbosity(mca_compress_zlib_component.super.output_handle, + mca_compress_zlib_component.super.verbose); + } else { + mca_compress_zlib_component.super.output_handle = opal_compress_base_framework.framework_output; + } + + /* + * Debug output + */ + opal_output_verbose(10, mca_compress_zlib_component.super.output_handle, + "compress:zlib: open()"); + opal_output_verbose(20, mca_compress_zlib_component.super.output_handle, + "compress:zlib: open: priority = %d", + mca_compress_zlib_component.super.priority); + opal_output_verbose(20, mca_compress_zlib_component.super.output_handle, + "compress:zlib: open: verbosity = %d", + mca_compress_zlib_component.super.verbose); + return OPAL_SUCCESS; +} + +static int compress_zlib_close(void) +{ + return OPAL_SUCCESS; +} + +int opal_compress_zlib_component_query(mca_base_module_t **module, int *priority) +{ + *module = (mca_base_module_t *)&loc_module; + *priority = mca_compress_zlib_component.super.priority; + + return OPAL_SUCCESS; +} + diff --git a/opal/mca/compress/zlib/configure.m4 b/opal/mca/compress/zlib/configure.m4 new file mode 100644 index 0000000000..a3829e508c --- /dev/null +++ b/opal/mca/compress/zlib/configure.m4 @@ -0,0 +1,102 @@ +# -*- shell-script -*- +# +# Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved. +# Copyright (c) 2013-2019 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_compress_zlib_CONFIG([action-if-can-compile], +# [action-if-cant-compile]) +# ------------------------------------------------ +AC_DEFUN([MCA_opal_compress_zlib_CONFIG],[ + AC_CONFIG_FILES([opal/mca/compress/zlib/Makefile]) + + OPAL_VAR_SCOPE_PUSH([opal_zlib_dir opal_zlib_libdir opal_zlib_standard_lib_location opal_zlib_standard_header_location opal_check_zlib_save_CPPFLAGS opal_check_zlib_save_LDFLAGS opal_check_zlib_save_LIBS]) + + AC_ARG_WITH([zlib], + [AC_HELP_STRING([--with-zlib=DIR], + [Search for zlib headers and libraries in DIR ])]) + + AC_ARG_WITH([zlib-libdir], + [AC_HELP_STRING([--with-zlib-libdir=DIR], + [Search for zlib libraries in DIR ])]) + + opal_check_zlib_save_CPPFLAGS="$CPPFLAGS" + opal_check_zlib_save_LDFLAGS="$LDFLAGS" + opal_check_zlib_save_LIBS="$LIBS" + + opal_zlib_support=0 + + if test "$with_zlib" != "no"; then + AC_MSG_CHECKING([for zlib in]) + if test ! -z "$with_zlib" && test "$with_zlib" != "yes"; then + opal_zlib_dir=$with_zlib + opal_zlib_source=$with_zlib + opal_zlib_standard_header_location=no + opal_zlib_standard_lib_location=no + AS_IF([test -z "$with_zlib_libdir" || test "$with_zlib_libdir" = "yes"], + [if test -d $with_zlib/lib; then + opal_zlib_libdir=$with_zlib/lib + elif test -d $with_zlib/lib64; then + opal_zlib_libdir=$with_zlib/lib64 + else + AC_MSG_RESULT([Could not find $with_zlib/lib or $with_zlib/lib64]) + AC_MSG_ERROR([Can not continue]) + fi + AC_MSG_RESULT([$opal_zlib_dir and $opal_zlib_libdir])], + [AC_MSG_RESULT([$with_zlib_libdir])]) + else + AC_MSG_RESULT([(default search paths)]) + opal_zlib_source=standard + opal_zlib_standard_header_location=yes + opal_zlib_standard_lib_location=yes + fi + AS_IF([test ! -z "$with_zlib_libdir" && test "$with_zlib_libdir" != "yes"], + [opal_zlib_libdir="$with_zlib_libdir" + opal_zlib_standard_lib_location=no]) + + OPAL_CHECK_PACKAGE([opal_zlib], + [zlib.h], + [z], + [deflate], + [-lz], + [$opal_zlib_dir], + [$opal_zlib_libdir], + [opal_zlib_support=1], + [opal_zlib_support=0]) + fi + + if test ! -z "$with_zlib" && test "$with_zlib" != "no" && test "$opal_zlib_support" != "1"; then + AC_MSG_WARN([ZLIB SUPPORT REQUESTED AND NOT FOUND]) + AC_MSG_ERROR([CANNOT CONTINUE]) + fi + + AC_MSG_CHECKING([will zlib support be built]) + if test "$opal_zlib_support" != "1"; then + AC_MSG_RESULT([no]) + else + AC_MSG_RESULT([yes]) + fi + + CPPFLAGS="$opal_check_zlib_save_CPPFLAGS" + LDFLAGS="$opal_check_zlib_save_LDFLAGS" + LIBS="$opal_check_zlib_save_LIBS" + + AS_IF([test "$opal_zlib_support" = "1"], + [$1 + OPAL_SUMMARY_ADD([[External Packages]],[[ZLIB]], [opal_zlib], [yes ($opal_zlib_source)])], + [$2]) + + # substitute in the things needed to build psm2 + AC_SUBST([compress_zlib_CFLAGS]) + AC_SUBST([compress_zlib_CPPFLAGS]) + AC_SUBST([compress_zlib_LDFLAGS]) + AC_SUBST([compress_zlib_LIBS]) + + OPAL_VAR_SCOPE_POP +])dnl diff --git a/orte/mca/grpcomm/brucks/owner.txt b/opal/mca/compress/zlib/owner.txt similarity index 79% rename from orte/mca/grpcomm/brucks/owner.txt rename to opal/mca/compress/zlib/owner.txt index 4ad6f408ca..b1efc765f0 100644 --- a/orte/mca/grpcomm/brucks/owner.txt +++ b/opal/mca/compress/zlib/owner.txt @@ -3,5 +3,5 @@ # owner: institution that is responsible for this package # status: e.g. active, maintenance, unmaintained # -owner: INTEL -status: maintenance +owner:project +status:maintenance diff --git a/opal/runtime/opal_init.c b/opal/runtime/opal_init.c index ea723d7d68..1c957ef23a 100644 --- a/opal/runtime/opal_init.c +++ b/opal/runtime/opal_init.c @@ -15,7 +15,7 @@ * Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2010-2015 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 Amazon.com, Inc. or its affiliates. @@ -61,9 +61,7 @@ #include "opal/mca/if/base/base.h" #include "opal/dss/dss.h" #include "opal/mca/shmem/base/base.h" -#if OPAL_ENABLE_FT_CR == 1 #include "opal/mca/compress/base/base.h" -#endif #include "opal/threads/threads.h" #include "opal/threads/tsd.h" @@ -524,7 +522,8 @@ opal_init_util(int* pargc, char*** pargv) static mca_base_framework_t *opal_init_frameworks[] = { &opal_hwloc_base_framework, &opal_memcpy_base_framework, &opal_memchecker_base_framework, &opal_backtrace_base_framework, &opal_timer_base_framework, &opal_event_base_framework, - &opal_shmem_base_framework, &opal_reachable_base_framework, NULL, + &opal_shmem_base_framework, &opal_reachable_base_framework, &opal_compress_base_framework, + NULL, }; int @@ -585,5 +584,12 @@ opal_init(int* pargc, char*** pargv) return opal_init_error ("opal_reachable_base_select", ret); } + /* Intitialize compress framework */ + if (OPAL_SUCCESS != (ret = opal_compress_base_select())) { + return opal_init_error ("opal_compress_base_select", ret); + } + + opal_finalize_pop_domain (); + return OPAL_SUCCESS; } diff --git a/orte/mca/errmgr/default_orted/errmgr_default_orted.c b/orte/mca/errmgr/default_orted/errmgr_default_orted.c index f423db57ef..5fe4ca1793 100644 --- a/orte/mca/errmgr/default_orted/errmgr_default_orted.c +++ b/orte/mca/errmgr/default_orted/errmgr_default_orted.c @@ -8,7 +8,7 @@ * reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -427,56 +427,6 @@ static void proc_errors(int fd, short args, void *cbdata) "%s errmgr:default:orted daemon %s exited", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(proc))); - /* if we are using static ports, then it is possible that the HNP - * will not see this termination. So if the HNP didn't order us - * to terminate, then we should ensure it knows */ - if (orte_static_ports && !orte_orteds_term_ordered) { - /* send an alert to the HNP */ - alert = OBJ_NEW(opal_buffer_t); - /* pack update state command */ - cmd = ORTE_PLM_UPDATE_PROC_STATE; - if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &cmd, 1, ORTE_PLM_CMD))) { - ORTE_ERROR_LOG(rc); - return; - } - /* get the proc_t */ - if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->vpid))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); - goto cleanup; - } - /* set the exit code to reflect the problem */ - child->exit_code = ORTE_ERR_COMM_FAILURE; - /* pack only the data for this daemon - have to start with the jobid - * so the receiver can unpack it correctly - */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(alert, &proc->jobid, 1, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - return; - } - - /* now pack the daemon's info */ - if (ORTE_SUCCESS != (rc = pack_state_for_proc(alert, child))) { - ORTE_ERROR_LOG(rc); - return; - } - /* send it */ - OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, - "%s errmgr:default_orted reporting lost connection to daemon %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(proc))); - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, alert, - ORTE_RML_TAG_PLM, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(alert); - } - /* mark that we notified the HNP for this job so we don't do it again */ - orte_set_attribute(&jdata->attributes, ORTE_JOB_FAIL_NOTIFIED, ORTE_ATTR_LOCAL, NULL, OPAL_BOOL); - /* continue on */ - goto cleanup; - } if (orte_orteds_term_ordered) { /* are any of my children still alive */ diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index a55c511587..172ac7212e 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2018 The University of Tennessee and The University + * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -12,9 +12,9 @@ * Copyright (c) 2009 Institut National de Recherche en Informatique * et Automatique. All rights reserved. * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011-2019 Los Alamos National Security, LLC. All rights + * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -57,7 +57,6 @@ #include "orte/mca/iof/base/base.h" #include "orte/mca/plm/base/base.h" #include "orte/mca/odls/base/base.h" -#include "orte/mca/regx/base/base.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rmaps/base/base.h" #include "orte/mca/filem/base/base.h" @@ -515,17 +514,6 @@ int orte_ess_base_orted_setup(void) error = "orte_rmaps_base_select"; goto error; } - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_regx_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_regx_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_regx_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_regx_base_select"; - goto error; - } - /* if a topology file was given, then the rmaps framework open * will have reset our topology. Ensure we always get the right @@ -542,46 +530,6 @@ int orte_ess_base_orted_setup(void) opal_dss.dump(0, opal_hwloc_topology, OPAL_HWLOC_TOPO); } - /* if we were given the host list, then we need to setup - * the daemon info so the RML can function properly - * without requiring a wireup stage. This must be done - * after we enable_comm as that function determines our - * own port, which we need in order to construct the nidmap - */ - if (NULL != orte_node_regex) { - if (ORTE_SUCCESS != (ret = orte_regx.nidmap_parse(orte_node_regex))) { - ORTE_ERROR_LOG(ret); - error = "construct nidmap"; - goto error; - } - /* be sure to update the routing tree so any tree spawn operation - * properly gets the number of children underneath us */ - orte_routed.update_routing_plan(NULL); - } - - if (orte_static_ports || orte_fwd_mpirun_port) { - if (NULL == orte_node_regex) { - /* we didn't get the node info */ - error = "cannot construct daemon map for static ports - no node map info"; - goto error; - } - /* extract the node info from the environment and - * build a nidmap from it - this will update the - * routing plan as well - */ - if (ORTE_SUCCESS != (ret = orte_regx.build_daemon_nidmap())) { - ORTE_ERROR_LOG(ret); - error = "construct daemon map from static ports"; - goto error; - } - /* be sure to update the routing tree so the initial "phone home" - * to mpirun goes through the tree if static ports were enabled - */ - orte_routed.update_routing_plan(NULL); - /* routing can be enabled */ - orte_routed_base.routing_enabled = true; - } - /* Now provide a chance for the PLM * to perform any module-specific init functions. This * needs to occur AFTER the communications are setup @@ -669,20 +617,15 @@ int orte_ess_base_orted_finalize(void) (void) mca_base_framework_close(&orte_filem_base_framework); (void) mca_base_framework_close(&orte_grpcomm_base_framework); (void) mca_base_framework_close(&orte_iof_base_framework); - /* first stage shutdown of the errmgr, deregister the handler but keep - * the required facilities until the rml and oob are offline */ - orte_errmgr.finalize(); + (void) mca_base_framework_close(&orte_errmgr_base_framework); (void) mca_base_framework_close(&orte_plm_base_framework); /* make sure our local procs are dead */ orte_odls.kill_local_procs(NULL); - (void) mca_base_framework_close(&orte_regx_base_framework); - (void) mca_base_framework_close(&orte_rmaps_base_framework); (void) mca_base_framework_close(&orte_rtc_base_framework); (void) mca_base_framework_close(&orte_odls_base_framework); (void) mca_base_framework_close(&orte_routed_base_framework); (void) mca_base_framework_close(&orte_rml_base_framework); (void) mca_base_framework_close(&orte_oob_base_framework); - (void) mca_base_framework_close(&orte_errmgr_base_framework); (void) mca_base_framework_close(&orte_state_base_framework); /* remove our use of the session directory tree */ orte_session_dir_finalize(ORTE_PROC_MY_NAME); diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index e5b844d475..6e56d69ff5 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -14,7 +14,7 @@ * Copyright (c) 2011-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -66,7 +66,6 @@ #include "orte/mca/grpcomm/base/base.h" #include "orte/mca/iof/base/base.h" #include "orte/mca/ras/base/base.h" -#include "orte/mca/regx/base/base.h" #include "orte/mca/plm/base/base.h" #include "orte/mca/plm/plm.h" #include "orte/mca/odls/base/base.h" @@ -555,16 +554,6 @@ static int rte_init(void) error = "orte_rmaps_base_find_available"; goto error; } - if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_regx_base_framework, 0))) { - ORTE_ERROR_LOG(ret); - error = "orte_regx_base_open"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_regx_base_select())) { - ORTE_ERROR_LOG(ret); - error = "orte_regx_base_select"; - goto error; - } /* if a topology file was given, then the rmaps framework open * will have reset our topology. Ensure we always get the right diff --git a/orte/mca/grpcomm/base/grpcomm_base_stubs.c b/orte/mca/grpcomm/base/grpcomm_base_stubs.c index 91fbb1ef5a..a27e8603e9 100644 --- a/orte/mca/grpcomm/base/grpcomm_base_stubs.c +++ b/orte/mca/grpcomm/base/grpcomm_base_stubs.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -33,7 +33,7 @@ #include "opal/dss/dss.h" -#include "orte/util/compress.h" +#include "opal/mca/compress/compress.h" #include "orte/util/proc_info.h" #include "orte/util/error_strings.h" #include "orte/mca/errmgr/errmgr.h" @@ -506,8 +506,8 @@ static int pack_xcast(orte_grpcomm_signature_t *sig, } /* see if we want to compress this message */ - if (orte_util_compress_block((uint8_t*)data.base_ptr, data.bytes_used, - &cmpdata, &cmplen)) { + if (opal_compress.compress_block((uint8_t*)data.base_ptr, data.bytes_used, + &cmpdata, &cmplen)) { /* the data was compressed - mark that we compressed it */ flag = 1; if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &flag, 1, OPAL_INT8))) { diff --git a/orte/mca/grpcomm/brucks/Makefile.am b/orte/mca/grpcomm/brucks/Makefile.am deleted file mode 100644 index 880ff3f01f..0000000000 --- a/orte/mca/grpcomm/brucks/Makefile.am +++ /dev/null @@ -1,41 +0,0 @@ -# -# Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2013 Los Alamos National Security, LLC. All rights -# reserved. -# Copyright (c) 2014-2018 Intel, Inc. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AM_CPPFLAGS = $(grpcomm_brucks_CPPFLAGS) - -sources = \ - grpcomm_brucks.h \ - grpcomm_brucks_module.c \ - grpcomm_brucks_component.c - -# Make the output library in this brucksory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_grpcomm_brucks_DSO -component_noinst = -component_install = mca_grpcomm_brucks.la -else -component_noinst = libmca_grpcomm_brucks.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_grpcomm_brucks_la_SOURCES = $(sources) -mca_grpcomm_brucks_la_LDFLAGS = -module -avoid-version -mca_grpcomm_brucks_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la - -noinst_LTLIBRARIES = $(component_noinst) -libmca_grpcomm_brucks_la_SOURCES =$(sources) -libmca_grpcomm_brucks_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/grpcomm/brucks/grpcomm_brucks.h b/orte/mca/grpcomm/brucks/grpcomm_brucks.h deleted file mode 100644 index 063c81c3f8..0000000000 --- a/orte/mca/grpcomm/brucks/grpcomm_brucks.h +++ /dev/null @@ -1,31 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ -#ifndef GRPCOMM_BRUCKS_H -#define GRPCOMM_BRUCKS_H - -#include "orte_config.h" - - -#include "orte/mca/grpcomm/grpcomm.h" - -BEGIN_C_DECLS - -/* - * Grpcomm interfaces - */ - -ORTE_MODULE_DECLSPEC extern orte_grpcomm_base_component_t mca_grpcomm_brucks_component; -extern orte_grpcomm_base_module_t orte_grpcomm_brucks_module; - -END_C_DECLS - -#endif diff --git a/orte/mca/grpcomm/brucks/grpcomm_brucks_component.c b/orte/mca/grpcomm/brucks/grpcomm_brucks_component.c deleted file mode 100644 index 705ea3d3c9..0000000000 --- a/orte/mca/grpcomm/brucks/grpcomm_brucks_component.c +++ /dev/null @@ -1,84 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include "orte/mca/mca.h" -#include "opal/runtime/opal_params.h" - -#include "orte/util/proc_info.h" - -#include "grpcomm_brucks.h" - -static int my_priority=5; -static int brucks_open(void); -static int brucks_close(void); -static int brucks_query(mca_base_module_t **module, int *priority); -static int brucks_register(void); - -/* - * Struct of function pointers that need to be initialized - */ -orte_grpcomm_base_component_t mca_grpcomm_brucks_component = { - .base_version = { - ORTE_GRPCOMM_BASE_VERSION_3_0_0, - - .mca_component_name = "brucks", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - .mca_open_component = brucks_open, - .mca_close_component = brucks_close, - .mca_query_component = brucks_query, - .mca_register_component_params = brucks_register, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - -static int brucks_register(void) -{ - mca_base_component_t *c = &mca_grpcomm_brucks_component.base_version; - - /* make the priority adjustable so users can select - * brucks for use by apps without affecting daemons - */ - my_priority = 50; - (void) mca_base_component_var_register(c, "priority", - "Priority of the grpcomm brucks component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &my_priority); - return ORTE_SUCCESS; -} - -/* Open the component */ -static int brucks_open(void) -{ - return ORTE_SUCCESS; -} - -static int brucks_close(void) -{ - return ORTE_SUCCESS; -} - -static int brucks_query(mca_base_module_t **module, int *priority) -{ - *priority = my_priority; - *module = (mca_base_module_t *)&orte_grpcomm_brucks_module; - return ORTE_SUCCESS; -} diff --git a/orte/mca/grpcomm/brucks/grpcomm_brucks_module.c b/orte/mca/grpcomm/brucks/grpcomm_brucks_module.c deleted file mode 100644 index 48c63af683..0000000000 --- a/orte/mca/grpcomm/brucks/grpcomm_brucks_module.c +++ /dev/null @@ -1,388 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2007 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Mellanox Technologies, Inc. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" -#include "orte/types.h" -#include "orte/runtime/orte_wait.h" - -#include -#include - -#include "opal/dss/dss.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" -#include "orte/util/name_fns.h" -#include "orte/util/proc_info.h" - -#include "orte/mca/grpcomm/base/base.h" -#include "grpcomm_brucks.h" - - -/* Static API's */ -static int init(void); -static void finalize(void); -static int allgather(orte_grpcomm_coll_t *coll, - opal_buffer_t *buf); -static void brucks_allgather_process_data(orte_grpcomm_coll_t *coll, uint32_t distance); -static int brucks_allgather_send_dist(orte_grpcomm_coll_t *coll, orte_process_name_t *peer, uint32_t distance); -static void brucks_allgather_recv_dist(int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata); -static int brucks_finalize_coll(orte_grpcomm_coll_t *coll, int ret); - -/* Module def */ -orte_grpcomm_base_module_t orte_grpcomm_brucks_module = { - init, - finalize, - NULL, - allgather -}; - -/** - * Initialize the module - */ -static int init(void) -{ - /* setup recv for distance data */ - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_ALLGATHER_BRUCKS, - ORTE_RML_PERSISTENT, - brucks_allgather_recv_dist, NULL); - return OPAL_SUCCESS; -} - -/** - * Finalize the module - */ -static void finalize(void) -{ - /* cancel the recv */ - orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER_BRUCKS); -} - -static int allgather(orte_grpcomm_coll_t *coll, - opal_buffer_t *sendbuf) -{ - OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:brucks algo employed for %d processes", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)coll->ndmns)); - /* get my own rank */ - coll->my_rank = ORTE_VPID_INVALID; - for (orte_vpid_t nv = 0; nv < coll->ndmns; nv++) { - if (coll->dmns[nv] == ORTE_PROC_MY_NAME->vpid) { - coll->my_rank = nv; - break; - } - } - - /* check for bozo case */ - if (ORTE_VPID_INVALID == coll->my_rank) { - OPAL_OUTPUT((orte_grpcomm_base_framework.framework_output, - "Peer not found")); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - brucks_finalize_coll(coll, ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - - /* record that we contributed */ - coll->nreported = 1; - - /* mark local data received */ - if (coll->ndmns > 1) { - opal_bitmap_init (&coll->distance_mask_recv, (uint32_t) log2 (coll->ndmns) + 1); - } - - /* start by seeding the collection with our own data */ - opal_dss.copy_payload(&coll->bucket, sendbuf); - - /* process data */ - brucks_allgather_process_data (coll, 0); - - return ORTE_SUCCESS; -} - -static int brucks_allgather_send_dist(orte_grpcomm_coll_t *coll, orte_process_name_t *peer, uint32_t distance) { - opal_buffer_t *send_buf; - int rc; - - send_buf = OBJ_NEW(opal_buffer_t); - - /* pack the signature */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(send_buf, &coll->sig, 1, ORTE_SIGNATURE))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(send_buf); - return rc; - } - /* pack the current distance */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(send_buf, &distance, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(send_buf); - return rc; - } - /* pack the number of daemons included in the payload */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(send_buf, &coll->nreported, 1, OPAL_SIZE))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(send_buf); - return rc; - } - /* pack the data */ - if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(send_buf, &coll->bucket))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(send_buf); - return rc; - } - - OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:brucks SENDING TO %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer))); - - - if (0 > (rc = orte_rml.send_buffer_nb(peer, send_buf, - ORTE_RML_TAG_ALLGATHER_BRUCKS, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(send_buf); - return rc; - }; - - return ORTE_SUCCESS; -} - -static int brucks_allgather_process_buffered (orte_grpcomm_coll_t *coll, uint32_t distance) { - opal_buffer_t *buffer; - size_t nreceived; - int32_t cnt = 1; - int rc; - - /* check whether data for next distance is available*/ - if (NULL == coll->buffers || NULL == coll->buffers[distance]) { - return 0; - } - - buffer = coll->buffers[distance]; - coll->buffers[distance] = NULL; - - OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:brucks %u distance data found", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance)); - rc = opal_dss.unpack (buffer, &nreceived, &cnt, OPAL_SIZE); - if (OPAL_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - brucks_finalize_coll(coll, rc); - return rc; - } - - if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(&coll->bucket, buffer))) { - ORTE_ERROR_LOG(rc); - brucks_finalize_coll(coll, rc); - return rc; - } - - coll->nreported += nreceived; - orte_grpcomm_base_mark_distance_recv (coll, distance); - OBJ_RELEASE(buffer); - - return 1; -} - -static void brucks_allgather_process_data(orte_grpcomm_coll_t *coll, uint32_t distance) { - /* Communication step: - At every step i, rank r: - - doubles the distance - - sends message containing all data collected so far to rank r - distance - - receives message containing all data collected so far from rank (r + distance) - */ - uint32_t log2ndmns = (uint32_t) log2 (coll->ndmns); - uint32_t last_round; - orte_process_name_t peer; - orte_vpid_t nv; - int rc; - - /* NTH: calculate in which round we should send the final data. this is the first - * round in which we have data from at least (coll->ndmns - (1 << log2ndmns)) - * daemons. alternatively we could just send when distance reaches log2ndmns but - * that could end up sending more data than needed */ - last_round = (uint32_t) ceil (log2 ((double) (coll->ndmns - (1 << log2ndmns)))); - - peer.jobid = ORTE_PROC_MY_NAME->jobid; - - while (distance < log2ndmns) { - OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:brucks process distance %u)", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance)); - - /* first send my current contents */ - nv = (coll->ndmns + coll->my_rank - (1 << distance)) % coll->ndmns; - peer.vpid = coll->dmns[nv]; - - brucks_allgather_send_dist(coll, &peer, distance); - - if (distance == last_round) { - /* have enough data to send the final round now */ - nv = (coll->ndmns + coll->my_rank - (1 << log2ndmns)) % coll->ndmns; - peer.vpid = coll->dmns[nv]; - brucks_allgather_send_dist(coll, &peer, log2ndmns); - } - - rc = brucks_allgather_process_buffered (coll, distance); - if (!rc) { - break; - } else if (rc < 0) { - return; - } - - ++distance; - } - - if (distance == log2ndmns) { - if (distance == last_round) { - /* need to send the final round now */ - nv = (coll->ndmns + coll->my_rank - (1 << log2ndmns)) % coll->ndmns; - peer.vpid = coll->dmns[nv]; - brucks_allgather_send_dist(coll, &peer, log2ndmns); - } - - /* check if the final message is already queued */ - rc = brucks_allgather_process_buffered (coll, distance); - if (rc < 0) { - return; - } - } - - OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:brucks reported %lu process from %lu", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (unsigned long)coll->nreported, - (unsigned long)coll->ndmns)); - - /* if we are done, then complete things. we may get data from more daemons than expected */ - if (coll->nreported >= coll->ndmns){ - brucks_finalize_coll(coll, ORTE_SUCCESS); - } -} - -static void brucks_allgather_recv_dist(int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata) -{ - int32_t cnt; - int rc; - orte_grpcomm_signature_t *sig; - orte_grpcomm_coll_t *coll; - uint32_t distance; - - OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:brucks RECEIVING FROM %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(sender))); - - /* unpack the signature */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &sig, &cnt, ORTE_SIGNATURE))) { - ORTE_ERROR_LOG(rc); - return; - } - - /* check for the tracker and create it if not found */ - if (NULL == (coll = orte_grpcomm_base_get_tracker(sig, true))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(sig); - return; - } - /* unpack the distance */ - distance = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &distance, &cnt, OPAL_INT32))) { - OBJ_RELEASE(sig); - ORTE_ERROR_LOG(rc); - brucks_finalize_coll(coll, rc); - return; - } - assert(0 == orte_grpcomm_base_check_distance_recv(coll, distance)); - - /* Check whether we can process next distance */ - if (coll->nreported && (!distance || orte_grpcomm_base_check_distance_recv(coll, distance - 1))) { - size_t nreceived; - OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:brucks data from %d distance received, " - "Process the next distance.", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance)); - /* capture any provided content */ - rc = opal_dss.unpack (buffer, &nreceived, &cnt, OPAL_SIZE); - if (OPAL_SUCCESS != rc) { - OBJ_RELEASE(sig); - ORTE_ERROR_LOG(rc); - brucks_finalize_coll(coll, rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(&coll->bucket, buffer))) { - OBJ_RELEASE(sig); - ORTE_ERROR_LOG(rc); - brucks_finalize_coll(coll, rc); - return; - } - coll->nreported += nreceived; - orte_grpcomm_base_mark_distance_recv(coll, distance); - brucks_allgather_process_data(coll, distance + 1); - } else { - OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:brucks data from %d distance received, " - "still waiting for data.", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance)); - if (NULL == coll->buffers) { - if (NULL == (coll->buffers = (opal_buffer_t **) calloc ((uint32_t) log2 (coll->ndmns) + 1, sizeof(opal_buffer_t *)))) { - rc = OPAL_ERR_OUT_OF_RESOURCE; - OBJ_RELEASE(sig); - ORTE_ERROR_LOG(rc); - brucks_finalize_coll(coll, rc); - return; - } - } - if (NULL == (coll->buffers[distance] = OBJ_NEW(opal_buffer_t))) { - rc = OPAL_ERR_OUT_OF_RESOURCE; - OBJ_RELEASE(sig); - ORTE_ERROR_LOG(rc); - brucks_finalize_coll(coll, rc); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(coll->buffers[distance], buffer))) { - OBJ_RELEASE(sig); - ORTE_ERROR_LOG(rc); - brucks_finalize_coll(coll, rc); - return; - } - } - - OBJ_RELEASE(sig); -} - -static int brucks_finalize_coll(orte_grpcomm_coll_t *coll, int ret) -{ - OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:brucks declared collective complete", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* execute the callback */ - if (NULL != coll->cbfunc) { - coll->cbfunc(ret, &coll->bucket, coll->cbdata); - } - - opal_list_remove_item(&orte_grpcomm_base.ongoing, &coll->super); - - return ORTE_SUCCESS; -} diff --git a/orte/mca/grpcomm/direct/grpcomm_direct.c b/orte/mca/grpcomm/direct/grpcomm_direct.c index 530e2ced01..ce95319dbc 100644 --- a/orte/mca/grpcomm/direct/grpcomm_direct.c +++ b/orte/mca/grpcomm/direct/grpcomm_direct.c @@ -5,7 +5,7 @@ * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All * rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -24,15 +24,15 @@ #include "opal/dss/dss.h" #include "opal/class/opal_list.h" #include "opal/mca/pmix/pmix.h" +#include "opal/mca/compress/compress.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/regx/regx.h" #include "orte/mca/rml/base/base.h" #include "orte/mca/rml/base/rml_contact.h" #include "orte/mca/routed/base/base.h" #include "orte/mca/state/state.h" -#include "orte/util/compress.h" #include "orte/util/name_fns.h" +#include "orte/util/nidmap.h" #include "orte/util/proc_info.h" #include "orte/mca/grpcomm/base/base.h" @@ -271,7 +271,7 @@ static void xcast_recv(int status, orte_process_name_t* sender, opal_list_t coll; orte_grpcomm_signature_t *sig; orte_rml_tag_t tag; - char *rtmod, *nidmap; + char *rtmod; size_t inlen, cmplen; uint8_t *packed_data, *cmpdata; int32_t nvals, i; @@ -336,7 +336,7 @@ static void xcast_recv(int status, orte_process_name_t* sender, return; } /* decompress the data */ - if (orte_util_uncompress_block(&cmpdata, cmplen, + if (opal_compress.decompress_block(&cmpdata, cmplen, packed_data, inlen)) { /* the data has been uncompressed */ opal_dss.load(&datbuf, cmpdata, cmplen); @@ -409,38 +409,17 @@ static void xcast_recv(int status, orte_process_name_t* sender, ORTE_ERROR_LOG(ret); goto relay; } - /* unpack the nidmap string - may be NULL */ - cnt = 1; - if (OPAL_SUCCESS != (ret = opal_dss.unpack(data, &nidmap, &cnt, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - goto relay; - } - if (NULL != nidmap) { - if (ORTE_SUCCESS != (ret = orte_regx.nidmap_parse(nidmap))) { - ORTE_ERROR_LOG(ret); - goto relay; - } - free(nidmap); - } - /* see if they included info on node capabilities */ + /* unpack flag indicating if nidmap included */ cnt = 1; if (OPAL_SUCCESS != (ret = opal_dss.unpack(data, &flag, &cnt, OPAL_INT8))) { ORTE_ERROR_LOG(ret); goto relay; } - if (0 != flag) { - /* update our local nidmap, if required - the decode function - * knows what to do - */ - OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:direct:xcast updating daemon nidmap", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - if (ORTE_SUCCESS != (ret = orte_regx.decode_daemon_nodemap(data))) { + if (1 == flag) { + if (ORTE_SUCCESS != (ret = orte_util_decode_nidmap(data))) { ORTE_ERROR_LOG(ret); goto relay; } - if (!ORTE_PROC_IS_HNP) { /* update the routing plan - the HNP already did * it when it computed the VM, so don't waste time @@ -450,7 +429,7 @@ static void xcast_recv(int status, orte_process_name_t* sender, /* routing is now possible */ orte_routed_base.routing_enabled = true; - /* unpack the byte object */ + /* unpack the wireup byte object */ cnt=1; if (ORTE_SUCCESS != (ret = opal_dss.unpack(data, &bo, &cnt, OPAL_BYTE_OBJECT))) { ORTE_ERROR_LOG(ret); diff --git a/orte/mca/grpcomm/rcd/Makefile.am b/orte/mca/grpcomm/rcd/Makefile.am deleted file mode 100644 index 77bc51510d..0000000000 --- a/orte/mca/grpcomm/rcd/Makefile.am +++ /dev/null @@ -1,41 +0,0 @@ -# -# Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2013 Los Alamos National Security, LLC. All rights -# reserved. -# Copyright (c) 2014-2018 Intel, Inc. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AM_CPPFLAGS = $(grpcomm_rcd_CPPFLAGS) - -sources = \ - grpcomm_rcd.h \ - grpcomm_rcd.c \ - grpcomm_rcd_component.c - -# Make the output library in this rcdory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_grpcomm_rcd_DSO -component_noinst = -component_install = mca_grpcomm_rcd.la -else -component_noinst = libmca_grpcomm_rcd.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_grpcomm_rcd_la_SOURCES = $(sources) -mca_grpcomm_rcd_la_LDFLAGS = -module -avoid-version -mca_grpcomm_rcd_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la - -noinst_LTLIBRARIES = $(component_noinst) -libmca_grpcomm_rcd_la_SOURCES =$(sources) -libmca_grpcomm_rcd_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/grpcomm/rcd/grpcomm_rcd.c b/orte/mca/grpcomm/rcd/grpcomm_rcd.c deleted file mode 100644 index 585c1cd713..0000000000 --- a/orte/mca/grpcomm/rcd/grpcomm_rcd.c +++ /dev/null @@ -1,329 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2007 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2011-2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All - * rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Mellanox Technologies, Inc. - * All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" -#include "orte/types.h" -#include "orte/runtime/orte_wait.h" - -#include -#include - -#include "opal/dss/dss.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" -#include "orte/util/name_fns.h" -#include "orte/util/proc_info.h" - -#include "orte/mca/grpcomm/base/base.h" -#include "grpcomm_rcd.h" - - -/* Static API's */ -static int init(void); -static void finalize(void); -static int allgather(orte_grpcomm_coll_t *coll, - opal_buffer_t *buf); -static void rcd_allgather_process_data(orte_grpcomm_coll_t *coll, uint32_t distance); -static int rcd_allgather_send_dist(orte_grpcomm_coll_t *coll, orte_process_name_t *peer, uint32_t distance); -static void rcd_allgather_recv_dist(int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata); -static int rcd_finalize_coll(orte_grpcomm_coll_t *coll, int ret); - -/* Module def */ -orte_grpcomm_base_module_t orte_grpcomm_rcd_module = { - init, - finalize, - NULL, - allgather -}; - -/** - * Initialize the module - */ -static int init(void) -{ - /* setup recv for distance data */ - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_ALLGATHER_RCD, - ORTE_RML_PERSISTENT, - rcd_allgather_recv_dist, NULL); - return OPAL_SUCCESS; -} - -/** - * Finalize the module - */ -static void finalize(void) -{ - /* cancel the recv */ - orte_rml.recv_cancel(ORTE_NAME_WILDCARD, ORTE_RML_TAG_ALLGATHER_RCD); -} - -static int allgather(orte_grpcomm_coll_t *coll, - opal_buffer_t *sendbuf) -{ - uint32_t log2ndmns; - - /* check the number of involved daemons - if it is not a power of two, - * then we cannot do it */ - if (0 == ((coll->ndmns != 0) && !(coll->ndmns & (coll->ndmns - 1)))) { - return ORTE_ERR_TAKE_NEXT_OPTION; - } - - log2ndmns = log2 (coll->ndmns); - - OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:recdub algo employed for %d daemons", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)coll->ndmns)); - - /* mark local data received */ - if (log2ndmns) { - opal_bitmap_init (&coll->distance_mask_recv, log2ndmns); - } - - /* get my own rank */ - coll->my_rank = ORTE_VPID_INVALID; - for (orte_vpid_t nv = 0 ; nv < coll->ndmns ; ++nv) { - if (coll->dmns[nv] == ORTE_PROC_MY_NAME->vpid) { - coll->my_rank = nv; - break; - } - } - - /* check for bozo case */ - if (ORTE_VPID_INVALID == coll->my_rank) { - OPAL_OUTPUT((orte_grpcomm_base_framework.framework_output, - "My peer not found in daemons array")); - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - rcd_finalize_coll(coll, ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - - /* start by seeding the collection with our own data */ - opal_dss.copy_payload(&coll->bucket, sendbuf); - - coll->nreported = 1; - - /* process data */ - rcd_allgather_process_data (coll, 0); - - return ORTE_SUCCESS; -} - -static int rcd_allgather_send_dist(orte_grpcomm_coll_t *coll, orte_process_name_t *peer, uint32_t distance) { - opal_buffer_t *send_buf; - int rc; - - send_buf = OBJ_NEW(opal_buffer_t); - - /* pack the signature */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(send_buf, &coll->sig, 1, ORTE_SIGNATURE))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(send_buf); - return rc; - } - /* pack the distance */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(send_buf, &distance, 1, OPAL_UINT32))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(send_buf); - return rc; - } - /* pack the data */ - if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(send_buf, &coll->bucket))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(send_buf); - return rc; - } - - OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:recdub SENDING TO %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer))); - - if (0 > (rc = orte_rml.send_buffer_nb(orte_coll_conduit, - peer, send_buf, - ORTE_RML_TAG_ALLGATHER_RCD, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(send_buf); - return rc; - }; - return ORTE_SUCCESS; -} - -static void rcd_allgather_process_data(orte_grpcomm_coll_t *coll, uint32_t distance) { - /* Communication step: - At every step i, rank r: - - exchanges message containing all data collected so far with rank peer = (r ^ 2^i). - */ - uint32_t log2ndmns = log2(coll->ndmns); - orte_process_name_t peer; - orte_vpid_t nv; - int rc; - - peer.jobid = ORTE_PROC_MY_NAME->jobid; - - while (distance < log2ndmns) { - OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:recdub process distance %u", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance)); - - /* first send my current contents */ - nv = coll->my_rank ^ (1 << distance); - assert (nv < coll->ndmns); - peer.vpid = coll->dmns[nv]; - - rcd_allgather_send_dist(coll, &peer, distance); - - /* check whether data for next distance is available */ - if (NULL == coll->buffers || NULL == coll->buffers[distance]) { - break; - } - - OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:recdub %u distance data found", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance)); - if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(&coll->bucket, coll->buffers[distance]))) { - ORTE_ERROR_LOG(rc); - rcd_finalize_coll(coll, rc); - return; - } - coll->nreported += 1 << distance; - orte_grpcomm_base_mark_distance_recv(coll, distance); - OBJ_RELEASE(coll->buffers[distance]); - coll->buffers[distance] = NULL; - ++distance; - } - - OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:recdub reported %lu process from %lu", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (unsigned long)coll->nreported, - (unsigned long)coll->ndmns)); - - /* if we are done, then complete things */ - if (coll->nreported == coll->ndmns) { - rcd_finalize_coll(coll, ORTE_SUCCESS); - } -} - -static void rcd_allgather_recv_dist(int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata) -{ - int32_t cnt; - uint32_t distance; - int rc; - orte_grpcomm_signature_t *sig; - orte_grpcomm_coll_t *coll; - - OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:recdub RECEIVING FROM %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(sender))); - - /* unpack the signature */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &sig, &cnt, ORTE_SIGNATURE))) { - ORTE_ERROR_LOG(rc); - return; - } - - /* check for the tracker and create it if not found */ - if (NULL == (coll = orte_grpcomm_base_get_tracker(sig, true))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(sig); - return; - } - /* unpack the distance */ - distance = -1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &distance, &cnt, OPAL_UINT32))) { - OBJ_RELEASE(sig); - ORTE_ERROR_LOG(rc); - rcd_finalize_coll(coll, rc); - return; - } - assert(distance >= 0 && 0 == orte_grpcomm_base_check_distance_recv(coll, distance)); - - /* Check whether we can process next distance */ - if (coll->nreported && (!distance || orte_grpcomm_base_check_distance_recv(coll, (distance - 1)))) { - OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:recdub data from %d distance received, " - "Process the next distance.", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance)); - /* capture any provided content */ - if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(&coll->bucket, buffer))) { - OBJ_RELEASE(sig); - ORTE_ERROR_LOG(rc); - rcd_finalize_coll(coll, rc); - return; - } - coll->nreported += (1 << distance); - orte_grpcomm_base_mark_distance_recv (coll, distance); - rcd_allgather_process_data (coll, distance + 1); - } else { - OPAL_OUTPUT_VERBOSE((80, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:recdub data from %d distance received, " - "still waiting for data.", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), distance)); - if (NULL == coll->buffers) { - coll->buffers = (opal_buffer_t **) calloc (log2 (coll->ndmns), sizeof (coll->buffers[0])); - if (NULL == coll->buffers) { - OBJ_RELEASE(sig); - ORTE_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE); - rcd_finalize_coll(coll, OPAL_ERR_OUT_OF_RESOURCE); - return; - } - } - if (NULL == (coll->buffers[distance] = OBJ_NEW(opal_buffer_t))) { - OBJ_RELEASE(sig); - ORTE_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE); - rcd_finalize_coll(coll, OPAL_ERR_OUT_OF_RESOURCE); - return; - } - if (OPAL_SUCCESS != (rc = opal_dss.copy_payload(coll->buffers[distance], buffer))) { - OBJ_RELEASE(sig); - ORTE_ERROR_LOG(rc); - rcd_finalize_coll(coll, rc); - return; - } - } - - OBJ_RELEASE(sig); -} - -static int rcd_finalize_coll(orte_grpcomm_coll_t *coll, int ret) -{ - OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output, - "%s grpcomm:coll:recdub declared collective complete", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* execute the callback */ - if (NULL != coll->cbfunc) { - coll->cbfunc(ret, &coll->bucket, coll->cbdata); - } - - opal_list_remove_item(&orte_grpcomm_base.ongoing, &coll->super); - - OBJ_RELEASE(coll); - - return ORTE_SUCCESS; -} diff --git a/orte/mca/grpcomm/rcd/grpcomm_rcd.h b/orte/mca/grpcomm/rcd/grpcomm_rcd.h deleted file mode 100644 index 98da4d99a0..0000000000 --- a/orte/mca/grpcomm/rcd/grpcomm_rcd.h +++ /dev/null @@ -1,31 +0,0 @@ -/* -*- C -*- - * - * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ -#ifndef GRPCOMM_RCD_H -#define GRPCOMM_RCD_H - -#include "orte_config.h" - - -#include "orte/mca/grpcomm/grpcomm.h" - -BEGIN_C_DECLS - -/* - * Grpcomm interfaces - */ - -ORTE_MODULE_DECLSPEC extern orte_grpcomm_base_component_t mca_grpcomm_rcd_component; -extern orte_grpcomm_base_module_t orte_grpcomm_rcd_module; - -END_C_DECLS - -#endif diff --git a/orte/mca/grpcomm/rcd/grpcomm_rcd_component.c b/orte/mca/grpcomm/rcd/grpcomm_rcd_component.c deleted file mode 100644 index b71557877d..0000000000 --- a/orte/mca/grpcomm/rcd/grpcomm_rcd_component.c +++ /dev/null @@ -1,84 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include "orte/mca/mca.h" -#include "opal/runtime/opal_params.h" - -#include "orte/util/proc_info.h" - -#include "grpcomm_rcd.h" - -static int my_priority=5; -static int rcd_open(void); -static int rcd_close(void); -static int rcd_query(mca_base_module_t **module, int *priority); -static int rcd_register(void); - -/* - * Struct of function pointers that need to be initialized - */ -orte_grpcomm_base_component_t mca_grpcomm_rcd_component = { - .base_version = { - ORTE_GRPCOMM_BASE_VERSION_3_0_0, - - .mca_component_name = "rcd", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - .mca_open_component = rcd_open, - .mca_close_component = rcd_close, - .mca_query_component = rcd_query, - .mca_register_component_params = rcd_register, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - -static int rcd_register(void) -{ - mca_base_component_t *c = &mca_grpcomm_rcd_component.base_version; - - /* make the priority adjustable so users can select - * rcd for use by apps without affecting daemons - */ - my_priority = 80; - (void) mca_base_component_var_register(c, "priority", - "Priority of the grpcomm rcd component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &my_priority); - return ORTE_SUCCESS; -} - -/* Open the component */ -static int rcd_open(void) -{ - return ORTE_SUCCESS; -} - -static int rcd_close(void) -{ - return ORTE_SUCCESS; -} - -static int rcd_query(mca_base_module_t **module, int *priority) -{ - *priority = my_priority; - *module = (mca_base_module_t *)&orte_grpcomm_rcd_module; - return ORTE_SUCCESS; -} diff --git a/orte/mca/grpcomm/rcd/owner.txt b/orte/mca/grpcomm/rcd/owner.txt deleted file mode 100644 index 4ad6f408ca..0000000000 --- a/orte/mca/grpcomm/rcd/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: INTEL -status: maintenance diff --git a/orte/mca/odls/base/odls_base_default_fns.c b/orte/mca/odls/base/odls_base_default_fns.c index b500e0b380..0bbf09f53d 100644 --- a/orte/mca/odls/base/odls_base_default_fns.c +++ b/orte/mca/odls/base/odls_base_default_fns.c @@ -14,7 +14,7 @@ * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2011-2018 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved. @@ -67,7 +67,6 @@ #include "orte/mca/ess/base/base.h" #include "orte/mca/grpcomm/base/base.h" #include "orte/mca/plm/base/base.h" -#include "orte/mca/regx/regx.h" #include "orte/mca/rml/base/rml_contact.h" #include "orte/mca/rmaps/rmaps_types.h" #include "orte/mca/rmaps/base/base.h" @@ -79,6 +78,7 @@ #include "orte/util/context_fns.h" #include "orte/util/name_fns.h" +#include "orte/util/nidmap.h" #include "orte/util/session_dir.h" #include "orte/util/proc_info.h" #include "orte/util/show_help.h" @@ -148,7 +148,6 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer, int8_t flag; void *nptr; uint32_t key; - char *nidmap; orte_proc_t *dmn, *proc; opal_value_t *val = NULL, *kv; opal_list_t *modex, ilist; @@ -167,33 +166,21 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer, return ORTE_SUCCESS; } - /* if we couldn't provide the allocation regex on the orted - * cmd line, then we need to provide all the info here */ - if (!orte_nidmap_communicated) { - if (ORTE_SUCCESS != (rc = orte_regx.nidmap_create(orte_node_pool, &nidmap))) { - ORTE_ERROR_LOG(rc); - return rc; - } - orte_nidmap_communicated = true; - } else { - nidmap = NULL; - } - opal_dss.pack(buffer, &nidmap, 1, OPAL_STRING); - if (NULL != nidmap) { - free(nidmap); - } - - /* if we haven't already done so, provide the info on the - * capabilities of each node */ + /* provide the nidmap - i.e., the map of hostnames + * and the vpid of the daemon running on each node. + * In a DVM, we should only have to do this once */ if (1 < orte_process_info.num_procs && (!orte_node_info_communicated || orte_get_attribute(&jdata->attributes, ORTE_JOB_LAUNCHED_DAEMONS, NULL, OPAL_BOOL))) { + /* mark that we did include this info */ flag = 1; opal_dss.pack(buffer, &flag, 1, OPAL_INT8); - if (ORTE_SUCCESS != (rc = orte_regx.encode_nodemap(buffer))) { + /* load the nidmap */ + if (ORTE_SUCCESS != (rc = orte_util_nidmap_create(orte_node_pool, buffer))) { ORTE_ERROR_LOG(rc); return rc; } + /* get wireup info for daemons */ if (NULL == (jptr = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) { ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); @@ -227,104 +214,100 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer, ORTE_ERROR_LOG(rc); OBJ_RELEASE(wireup); return rc; - } else { - /* the data is returned as a list of key-value pairs in the opal_value_t */ - if (OPAL_PTR != val->type) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(wireup); - return ORTE_ERR_NOT_FOUND; - } - if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(wireup); - return rc; - } - modex = (opal_list_t*)val->data.ptr; - numbytes = (int32_t)opal_list_get_size(modex); - if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &numbytes, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(wireup); - return rc; - } - OPAL_LIST_FOREACH(kv, modex, opal_value_t) { - if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &kv, 1, OPAL_VALUE))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(wireup); - return rc; - } - } - OPAL_LIST_RELEASE(modex); - OBJ_RELEASE(val); } - } - /* if we didn't rollup the connection info, then we have - * to provide a complete map of connection info */ - if (!orte_static_ports && !orte_fwd_mpirun_port) { - for (v=1; v < jptr->procs->size; v++) { - if (NULL == (dmn = (orte_proc_t*)opal_pointer_array_get_item(jptr->procs, v))) { - continue; + /* the data is returned as a list of key-value pairs in the opal_value_t */ + if (OPAL_PTR != val->type) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + OBJ_RELEASE(wireup); + return ORTE_ERR_NOT_FOUND; + } + if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, ORTE_PROC_MY_NAME, 1, ORTE_NAME))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(wireup); + return rc; + } + modex = (opal_list_t*)val->data.ptr; + numbytes = (int32_t)opal_list_get_size(modex); + if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &numbytes, 1, OPAL_INT32))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(wireup); + return rc; + } + OPAL_LIST_FOREACH(kv, modex, opal_value_t) { + if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &kv, 1, OPAL_VALUE))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(wireup); + return rc; } - val = NULL; - if (opal_pmix.legacy_get()) { - if (OPAL_SUCCESS != (rc = opal_pmix.get(&dmn->name, OPAL_PMIX_PROC_URI, NULL, &val)) || NULL == val) { + } + OPAL_LIST_RELEASE(modex); + OBJ_RELEASE(val); + } + /* provide a complete map of connection info */ + for (v=1; v < jptr->procs->size; v++) { + if (NULL == (dmn = (orte_proc_t*)opal_pointer_array_get_item(jptr->procs, v))) { + continue; + } + val = NULL; + if (opal_pmix.legacy_get()) { + if (OPAL_SUCCESS != (rc = opal_pmix.get(&dmn->name, OPAL_PMIX_PROC_URI, NULL, &val)) || NULL == val) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(buffer); + OBJ_RELEASE(wireup); + return rc; + } else { + /* pack the name of the daemon */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &dmn->name, 1, ORTE_NAME))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buffer); OBJ_RELEASE(wireup); return rc; - } else { - /* pack the name of the daemon */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &dmn->name, 1, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - OBJ_RELEASE(wireup); - return rc; - } - /* pack the URI */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &val->data.string, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - OBJ_RELEASE(wireup); - return rc; - } - OBJ_RELEASE(val); } - } else { - if (OPAL_SUCCESS != (rc = opal_pmix.get(&dmn->name, NULL, NULL, &val)) || NULL == val) { + /* pack the URI */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &val->data.string, 1, OPAL_STRING))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buffer); + OBJ_RELEASE(wireup); return rc; - } else { - /* the data is returned as a list of key-value pairs in the opal_value_t */ - if (OPAL_PTR != val->type) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OBJ_RELEASE(buffer); - return ORTE_ERR_NOT_FOUND; - } - if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &dmn->name, 1, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - OBJ_RELEASE(wireup); - return rc; - } - modex = (opal_list_t*)val->data.ptr; - numbytes = (int32_t)opal_list_get_size(modex); - if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &numbytes, 1, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - OBJ_RELEASE(wireup); - return rc; - } - OPAL_LIST_FOREACH(kv, modex, opal_value_t) { - if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &kv, 1, OPAL_VALUE))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buffer); - OBJ_RELEASE(wireup); - return rc; - } - } - OPAL_LIST_RELEASE(modex); - OBJ_RELEASE(val); } + OBJ_RELEASE(val); + } + } else { + if (OPAL_SUCCESS != (rc = opal_pmix.get(&dmn->name, NULL, NULL, &val)) || NULL == val) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(buffer); + return rc; + } else { + /* the data is returned as a list of key-value pairs in the opal_value_t */ + if (OPAL_PTR != val->type) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + OBJ_RELEASE(buffer); + return ORTE_ERR_NOT_FOUND; + } + if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &dmn->name, 1, ORTE_NAME))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(buffer); + OBJ_RELEASE(wireup); + return rc; + } + modex = (opal_list_t*)val->data.ptr; + numbytes = (int32_t)opal_list_get_size(modex); + if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &numbytes, 1, OPAL_INT32))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(buffer); + OBJ_RELEASE(wireup); + return rc; + } + OPAL_LIST_FOREACH(kv, modex, opal_value_t) { + if (ORTE_SUCCESS != (rc = opal_dss.pack(wireup, &kv, 1, OPAL_VALUE))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(buffer); + OBJ_RELEASE(wireup); + return rc; + } + } + OPAL_LIST_RELEASE(modex); + OBJ_RELEASE(val); } } } @@ -417,17 +400,11 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer, } if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { - /* compute and pack the ppn regex */ - if (ORTE_SUCCESS != (rc = orte_regx.generate_ppn(jdata, &nidmap))) { + /* compute and pack the ppn */ + if (ORTE_SUCCESS != (rc = orte_util_generate_ppn(jdata, buffer))) { ORTE_ERROR_LOG(rc); return rc; } - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &nidmap, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - free(nidmap); - return rc; - } - free(nidmap); } /* get any application prep info */ @@ -485,7 +462,6 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, orte_proc_t *pptr, *dmn; orte_app_context_t *app; int8_t flag; - char *ppn; opal_value_t *kv; opal_list_t local_support, cache; opal_pmix_lock_t lock; @@ -623,29 +599,21 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer, * and sent us the complete array of procs in the orte_job_t, so we * don't need to do anything more here */ if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) { - /* extract the ppn regex */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &ppn, &cnt, OPAL_STRING))) { + /* load the ppn info into the job and node arrays - the + * function will ignore the data on the HNP as it already + * has the info */ + if (ORTE_SUCCESS != (rc = orte_util_decode_ppn(jdata, buffer))) { ORTE_ERROR_LOG(rc); goto REPORT_ERROR; } if (!ORTE_PROC_IS_HNP) { - /* populate the node array of the job map and the proc array of - * the job object so we know how many procs are on each node */ - if (ORTE_SUCCESS != (rc = orte_regx.parse_ppn(jdata, ppn))) { - ORTE_ERROR_LOG(rc); - free(ppn); - goto REPORT_ERROR; - } - /* now assign locations to the procs */ + /* assign locations to the procs */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_assign_locations(jdata))) { ORTE_ERROR_LOG(rc); - free(ppn); goto REPORT_ERROR; } } - free(ppn); /* compute the ranks and add the proc objects * to the jdata->procs array */ diff --git a/orte/mca/oob/tcp/oob_tcp_component.c b/orte/mca/oob/tcp/oob_tcp_component.c index d7e1f5f9b2..85a17e01a6 100644 --- a/orte/mca/oob/tcp/oob_tcp_component.c +++ b/orte/mca/oob/tcp/oob_tcp_component.c @@ -14,7 +14,7 @@ * reserved. * Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014 NVIDIA Corporation. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -334,11 +334,6 @@ static int tcp_component_register(void) if (NULL != mca_oob_tcp_component.tcp_static_ports || NULL != mca_oob_tcp_component.tcp6_static_ports) { - /* can't fwd mpirun port _and_ have static ports */ - if (ORTE_PROC_IS_HNP && orte_fwd_mpirun_port) { - orte_show_help("help-oob-tcp.txt", "static-fwd", true); - return ORTE_ERR_NOT_AVAILABLE; - } orte_static_ports = true; } diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index 413f960827..363c006233 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -13,7 +13,7 @@ * Copyright (c) 2009 Institut National de Recherche en Informatique * et Automatique. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. @@ -44,8 +44,10 @@ #include "opal/dss/dss.h" #include "opal/mca/hwloc/hwloc-internal.h" #include "opal/mca/pmix/pmix.h" +#include "opal/mca/compress/compress.h" #include "orte/util/dash_host/dash_host.h" +#include "orte/util/nidmap.h" #include "orte/util/session_dir.h" #include "orte/util/show_help.h" #include "orte/mca/errmgr/errmgr.h" @@ -53,7 +55,6 @@ #include "orte/mca/iof/base/base.h" #include "orte/mca/odls/base/base.h" #include "orte/mca/ras/base/base.h" -#include "orte/mca/regx/regx.h" #include "orte/mca/rmaps/rmaps.h" #include "orte/mca/rmaps/base/base.h" #include "orte/mca/rml/rml.h" @@ -72,7 +73,6 @@ #include "orte/runtime/runtime.h" #include "orte/runtime/orte_locks.h" #include "orte/runtime/orte_quit.h" -#include "orte/util/compress.h" #include "orte/util/name_fns.h" #include "orte/util/pre_condition_transports.h" #include "orte/util/proc_info.h" @@ -580,7 +580,7 @@ void orte_plm_base_send_launch_msg(int fd, short args, void *cbdata) uint8_t *cmpdata; size_t cmplen; /* report the size of the launch message */ - compressed = orte_util_compress_block((uint8_t*)jdata->launch_msg.base_ptr, + compressed = opal_compress.compress_block((uint8_t*)jdata->launch_msg.base_ptr, jdata->launch_msg.bytes_used, &cmpdata, &cmplen); if (compressed) { @@ -857,7 +857,7 @@ void orte_plm_base_daemon_topology(int status, orte_process_name_t* sender, goto CLEANUP; } /* decompress the data */ - if (orte_util_uncompress_block(&cmpdata, cmplen, + if (opal_compress.decompress_block(&cmpdata, cmplen, packed_data, inlen)) { /* the data has been uncompressed */ opal_dss.load(&datbuf, cmpdata, cmplen); @@ -1184,7 +1184,7 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, goto CLEANUP; } /* decompress the data */ - if (orte_util_uncompress_block(&cmpdata, cmplen, + if (opal_compress.decompress_block(&cmpdata, cmplen, packed_data, inlen)) { /* the data has been uncompressed */ opal_dss.load(&datbuf, cmpdata, cmplen); @@ -1515,46 +1515,6 @@ int orte_plm_base_orted_append_basic_args(int *argc, char ***argv, opal_argv_append(argc, argv, param); free(param); - /* convert the nodes with daemons to a regex */ - param = NULL; - if (ORTE_SUCCESS != (rc = orte_regx.nidmap_create(orte_node_pool, ¶m))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (NULL != orte_node_regex) { - free(orte_node_regex); - } - orte_node_regex = param; - /* if this is too long, then we'll have to do it with - * a phone home operation instead */ - if (strlen(param) < orte_plm_globals.node_regex_threshold) { - opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); - opal_argv_append(argc, argv, "orte_node_regex"); - opal_argv_append(argc, argv, orte_node_regex); - /* mark that the nidmap has been communicated */ - orte_nidmap_communicated = true; - } - - if (!orte_static_ports && !orte_fwd_mpirun_port) { - /* if we are using static ports, or we are forwarding - * mpirun's port, then we would have built all the - * connection info and so there is nothing to be passed. - * Otherwise, we have to pass the HNP uri so we can - * phone home */ - opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); - opal_argv_append(argc, argv, "orte_hnp_uri"); - opal_argv_append(argc, argv, orte_process_info.my_hnp_uri); - } - - /* if requested, pass our port */ - if (orte_fwd_mpirun_port) { - opal_asprintf(¶m, "%d", orte_process_info.my_port); - opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); - opal_argv_append(argc, argv, "oob_tcp_static_ipv4_ports"); - opal_argv_append(argc, argv, param); - free(param); - } - /* if --xterm was specified, pass that along */ if (NULL != orte_xterm) { opal_argv_append(argc, argv, "-"OPAL_MCA_CMD_LINE_ID); @@ -2136,7 +2096,7 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata) opal_list_remove_item(&nodes, item); OBJ_RELEASE(item); } else { - /* The filtering logic sets this flag only for nodes which + /* The filtering logic sets this flag only for nodes which * are kept after filtering. This flag will be subsequently * used in rmaps components and must be reset here */ ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); diff --git a/orte/mca/regx/Makefile.am b/orte/mca/regx/Makefile.am deleted file mode 100644 index 8248d9f464..0000000000 --- a/orte/mca/regx/Makefile.am +++ /dev/null @@ -1,30 +0,0 @@ -# -# Copyright (c) 2015-2018 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# main library setup -noinst_LTLIBRARIES = libmca_regx.la -libmca_regx_la_SOURCES = - -# pkgdata setup -dist_ortedata_DATA = - -# local files -headers = regx.h -libmca_regx_la_SOURCES += $(headers) - -# Conditionally install the header files -if WANT_INSTALL_HEADERS -ortedir = $(orteincludedir)/$(subdir) -nobase_orte_HEADERS = $(headers) -endif - -include base/Makefile.am - -distclean-local: - rm -f base/static-components.h diff --git a/orte/mca/regx/base/Makefile.am b/orte/mca/regx/base/Makefile.am deleted file mode 100644 index cee4dd7ceb..0000000000 --- a/orte/mca/regx/base/Makefile.am +++ /dev/null @@ -1,18 +0,0 @@ -# -# Copyright (c) 2015-2018 Intel, Inc. All rights reserved. -# Copyright (c) 2018 Research Organization for Information Science -# and Technology (RIST). All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -headers += \ - base/base.h - -libmca_regx_la_SOURCES += \ - base/regx_base_default_fns.c \ - base/regx_base_frame.c \ - base/regx_base_select.c diff --git a/orte/mca/regx/base/base.h b/orte/mca/regx/base/base.h deleted file mode 100644 index a1d34e67c7..0000000000 --- a/orte/mca/regx/base/base.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * regx framework base functionality. - */ - -#ifndef ORTE_MCA_REGX_BASE_H -#define ORTE_MCA_REGX_BASE_H - -/* - * includes - */ -#include "orte_config.h" -#include "orte/types.h" - -#include "opal/class/opal_list.h" -#include "orte/mca/mca.h" - -#include "orte/runtime/orte_globals.h" - -#include "orte/mca/regx/regx.h" - -BEGIN_C_DECLS - -/* - * MCA Framework - */ -ORTE_DECLSPEC extern mca_base_framework_t orte_regx_base_framework; -/* select all components */ -ORTE_DECLSPEC int orte_regx_base_select(void); - -/* - * common stuff - */ -typedef struct { - opal_list_item_t super; - int vpid; - int cnt; - int slots; - orte_topology_t *t; -} orte_regex_range_t; - -OBJ_CLASS_DECLARATION(orte_regex_range_t); - -typedef struct { - /* list object */ - opal_list_item_t super; - char *prefix; - char *suffix; - int num_digits; - opal_list_t ranges; -} orte_regex_node_t; -END_C_DECLS - -OBJ_CLASS_DECLARATION(orte_regex_node_t); - -ORTE_DECLSPEC extern int orte_regx_base_nidmap_parse(char *regex); - -ORTE_DECLSPEC extern int orte_regx_base_encode_nodemap(opal_buffer_t *buffer); - -ORTE_DECLSPEC int orte_regx_base_decode_daemon_nodemap(opal_buffer_t *buffer); - -ORTE_DECLSPEC int orte_regx_base_generate_ppn(orte_job_t *jdata, char **ppn); - -ORTE_DECLSPEC int orte_regx_base_parse_ppn(orte_job_t *jdata, char *regex); - -ORTE_DECLSPEC int orte_regx_base_extract_node_names(char *regexp, char ***names); -#endif diff --git a/orte/mca/regx/base/owner.txt b/orte/mca/regx/base/owner.txt deleted file mode 100644 index 85b4416d20..0000000000 --- a/orte/mca/regx/base/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: INTEL -status: active diff --git a/orte/mca/regx/base/regx_base_default_fns.c b/orte/mca/regx/base/regx_base_default_fns.c deleted file mode 100644 index 54684a01e2..0000000000 --- a/orte/mca/regx/base/regx_base_default_fns.c +++ /dev/null @@ -1,1282 +0,0 @@ -/* - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#include "orte_config.h" -#include "orte/types.h" -#include "opal/types.h" - -#ifdef HAVE_UNISTD_H -#include -#endif -#include - -#include "opal/util/argv.h" -#include "opal/util/basename.h" -#include "opal/util/opal_environ.h" -#include "opal/util/printf.h" - -#include "orte/runtime/orte_globals.h" -#include "orte/util/name_fns.h" -#include "orte/util/show_help.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rmaps/base/base.h" -#include "orte/mca/routed/routed.h" -#include "orte/mca/regx/base/base.h" - -static void range_construct(orte_regex_range_t *ptr) -{ - ptr->vpid = 0; - ptr->cnt = 0; -} -OBJ_CLASS_INSTANCE(orte_regex_range_t, - opal_list_item_t, - range_construct, NULL); - -static void orte_regex_node_construct(orte_regex_node_t *ptr) -{ - ptr->prefix = NULL; - ptr->suffix = NULL; - ptr->num_digits = 0; - OBJ_CONSTRUCT(&ptr->ranges, opal_list_t); -} - -static void orte_regex_node_destruct(orte_regex_node_t *ptr) -{ - opal_list_item_t *item; - - if (NULL != ptr->prefix) { - free(ptr->prefix); - } - if (NULL != ptr->suffix) { - free(ptr->suffix); - } - - while (NULL != (item = opal_list_remove_first(&ptr->ranges))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&ptr->ranges); -} - -OBJ_CLASS_INSTANCE(orte_regex_node_t, - opal_list_item_t, - orte_regex_node_construct, - orte_regex_node_destruct); - -int orte_regx_base_nidmap_parse(char *regex) -{ - char *nodelist, *vpids, *ptr; - char **nodes, **dvpids; - int rc, n, cnt; - orte_regex_range_t *rng; - opal_list_t dids; - orte_job_t *daemons; - orte_node_t *nd; - orte_proc_t *proc; - - /* if we are the HNP, we don't need to parse this */ - if (ORTE_PROC_IS_HNP) { - return ORTE_SUCCESS; - } - - /* split the regex into its node and vpid parts */ - nodelist = regex; - vpids = strchr(regex, '@'); - if (NULL == vpids) { - /* indicates the regex got mangled somewhere */ - return ORTE_ERR_BAD_PARAM; - } - *vpids = '\0'; // terminate the nodelist string - ++vpids; // step over the separator - if (NULL == vpids || '\0' == *vpids) { - /* indicates the regex got mangled somewhere */ - return ORTE_ERR_BAD_PARAM; - } - - /* decompress the nodes regex */ - nodes = NULL; - if (ORTE_SUCCESS != (rc = orte_regx.extract_node_names(nodelist, &nodes))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (NULL == nodes) { - /* should not happen */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - - /* decompress the vpids */ - OBJ_CONSTRUCT(&dids, opal_list_t); - dvpids = opal_argv_split(vpids, ','); - for (n=0; NULL != dvpids[n]; n++) { - rng = OBJ_NEW(orte_regex_range_t); - opal_list_append(&dids, &rng->super); - /* check for a count */ - if (NULL != (ptr = strchr(dvpids[n], '('))) { - dvpids[n][strlen(dvpids[n])-1] = '\0'; // remove trailing paren - *ptr = '\0'; - ++ptr; - rng->cnt = strtoul(ptr, NULL, 10); - } else { - rng->cnt = 1; - } - /* convert the number */ - rng->vpid = strtoul(dvpids[n], NULL, 10); - } - opal_argv_free(dvpids); - - /* get the daemon job object */ - daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); - - /* create the node pool array - this will include - * _all_ nodes known to the allocation */ - rng = (orte_regex_range_t*)opal_list_get_first(&dids); - cnt = 0; - for (n=0; NULL != nodes[n]; n++) { - nd = OBJ_NEW(orte_node_t); - nd->name = nodes[n]; - opal_pointer_array_set_item(orte_node_pool, n, nd); - /* see if it has a daemon on it */ - if (-1 != rng->vpid) { - /* we have a daemon, so let's create the tracker for it */ - if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, rng->vpid+cnt))) { - proc = OBJ_NEW(orte_proc_t); - proc->name.jobid = ORTE_PROC_MY_NAME->jobid; - proc->name.vpid = rng->vpid + cnt; - proc->state = ORTE_PROC_STATE_RUNNING; - ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_ALIVE); - daemons->num_procs++; - opal_pointer_array_set_item(daemons->procs, proc->name.vpid, proc); - } - nd->index = proc->name.vpid; - OBJ_RETAIN(nd); - proc->node = nd; - OBJ_RETAIN(proc); - nd->daemon = proc; - } - ++cnt; - if (rng->cnt <= cnt) { - rng = (orte_regex_range_t*)opal_list_get_next(&rng->super); - if (NULL == rng) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - cnt = 0; - } - } - - /* update num procs */ - if (orte_process_info.num_procs != daemons->num_procs) { - orte_process_info.num_procs = daemons->num_procs; - /* need to update the routing plan */ - orte_routed.update_routing_plan(NULL); - } - - if (orte_process_info.max_procs < orte_process_info.num_procs) { - orte_process_info.max_procs = orte_process_info.num_procs; - } - - if (0 < opal_output_get_verbosity(orte_regx_base_framework.framework_output)) { - int i; - for (i=0; i < orte_node_pool->size; i++) { - if (NULL == (nd = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { - continue; - } - opal_output(0, "%s node[%d].name %s daemon %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), i, - (NULL == nd->name) ? "NULL" : nd->name, - (NULL == nd->daemon) ? "NONE" : ORTE_VPID_PRINT(nd->daemon->name.vpid)); - } - } - - return ORTE_SUCCESS; -} - -int orte_regx_base_encode_nodemap(opal_buffer_t *buffer) -{ - int n; - bool test; - orte_regex_range_t *rng, *slt, *tp, *flg; - opal_list_t slots, topos, flags; - opal_list_item_t *item; - char *tmp, *tmp2; - orte_node_t *nptr; - int rc; - uint8_t ui8; - orte_topology_t *ortetopo; - - /* setup the list of results */ - OBJ_CONSTRUCT(&slots, opal_list_t); - OBJ_CONSTRUCT(&topos, opal_list_t); - OBJ_CONSTRUCT(&flags, opal_list_t); - - slt = NULL; - tp = NULL; - flg = NULL; - - /* pack a flag indicating if the HNP was included in the allocation */ - if (orte_hnp_is_allocated) { - ui8 = 1; - } else { - ui8 = 0; - } - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &ui8, 1, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* pack a flag indicating if we are in a managed allocation */ - if (orte_managed_allocation) { - ui8 = 1; - } else { - ui8 = 0; - } - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &ui8, 1, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* handle the topologies - as the most common case by far - * is to have homogeneous topologies, we only send them - * if something is different. We know that the HNP is - * the first topology, and that any differing topology - * on the compute nodes must follow. So send the topologies - * if and only if: - * - * (a) the HNP is being used to house application procs and - * there is more than one topology on our list; or - * - * (b) the HNP is not being used, but there are more than - * two topologies on our list, thus indicating that - * there are multiple topologies on the compute nodes - */ - nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0); - if (!orte_hnp_is_allocated || (ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping) & ORTE_MAPPING_NO_USE_LOCAL)) { - /* assign a NULL topology so we still account for our presence, - * but don't cause us to send topology info when not needed */ - tp = OBJ_NEW(orte_regex_range_t); - tp->t = NULL; - tp->cnt = 1; - } else { - /* there is always one topology - our own - so start with it */ - tp = OBJ_NEW(orte_regex_range_t); - tp->t = nptr->topology; - tp->cnt = 1; - } - opal_list_append(&topos, &tp->super); - - opal_output_verbose(5, orte_regx_base_framework.framework_output, - "%s STARTING WITH TOPOLOGY FOR NODE %s: %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - nptr->name, (NULL == tp->t) ? "NULL" : tp->t->sig); - - /* likewise, we have slots */ - slt = OBJ_NEW(orte_regex_range_t); - slt->slots = nptr->slots; - slt->cnt = 1; - opal_list_append(&slots, &slt->super); - - /* and flags */ - flg = OBJ_NEW(orte_regex_range_t); - if (ORTE_FLAG_TEST(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN)) { - flg->slots = 1; - } else { - flg->slots = 0; - } - flg->cnt = 1; - opal_list_append(&flags, &flg->super); - - for (n=1; n < orte_node_pool->size; n++) { - if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { - continue; - } - /* check the #slots */ - /* is this the next in line */ - if (nptr->slots == slt->slots) { - slt->cnt++; - } else { - /* need to start another range */ - slt = OBJ_NEW(orte_regex_range_t); - slt->slots = nptr->slots; - slt->cnt = 1; - opal_list_append(&slots, &slt->super); - } - /* check the topologies */ - if (NULL != tp->t && NULL == nptr->topology) { - /* we don't know this topology, likely because - * we don't have a daemon on the node */ - tp = OBJ_NEW(orte_regex_range_t); - tp->t = NULL; - tp->cnt = 1; - opal_output_verbose(5, orte_regx_base_framework.framework_output, - "%s ADD TOPOLOGY FOR NODE %s: NULL", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nptr->name); - opal_list_append(&topos, &tp->super); - } else { - /* is this the next in line */ - if (tp->t == nptr->topology) { - tp->cnt++; - opal_output_verbose(5, orte_regx_base_framework.framework_output, - "%s CONTINUE TOPOLOGY RANGE (%d) WITH NODE %s: %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - tp->cnt, nptr->name, - (NULL == tp->t) ? "N/A" : tp->t->sig); - } else { - /* need to start another range */ - tp = OBJ_NEW(orte_regex_range_t); - tp->t = nptr->topology; - tp->cnt = 1; - opal_output_verbose(5, orte_regx_base_framework.framework_output, - "%s STARTING NEW TOPOLOGY RANGE WITH NODE %s: %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - nptr->name, tp->t->sig); - opal_list_append(&topos, &tp->super); - } - } - /* check the flags */ - test = ORTE_FLAG_TEST(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN); - /* is this the next in line */ - if ((test && 1 == flg->slots) || - (!test && 0 == flg->slots)) { - flg->cnt++; - } else { - /* need to start another range */ - flg = OBJ_NEW(orte_regex_range_t); - if (test) { - flg->slots = 1; - } else { - flg->slots = 0; - } - flg->cnt = 1; - opal_list_append(&flags, &flg->super); - } - } - - /* pass #slots on each node */ - tmp = NULL; - while (NULL != (item = opal_list_remove_first(&slots))) { - rng = (orte_regex_range_t*)item; - if (NULL == tmp) { - opal_asprintf(&tmp, "%d[%d]", rng->cnt, rng->slots); - } else { - opal_asprintf(&tmp2, "%s,%d[%d]", tmp, rng->cnt, rng->slots); - free(tmp); - tmp = tmp2; - } - OBJ_RELEASE(rng); - } - OPAL_LIST_DESTRUCT(&slots); - opal_output_verbose(1, orte_regx_base_framework.framework_output, - "%s SLOT ASSIGNMENTS: %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp); - /* pack the string */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (NULL != tmp) { - free(tmp); - } - - /* do the same to pass the flags for each node */ - tmp = NULL; - while (NULL != (item = opal_list_remove_first(&flags))) { - rng = (orte_regex_range_t*)item; - if (NULL == tmp) { - opal_asprintf(&tmp, "%d[%d]", rng->cnt, rng->slots); - } else { - opal_asprintf(&tmp2, "%s,%d[%d]", tmp, rng->cnt, rng->slots); - free(tmp); - tmp = tmp2; - } - OBJ_RELEASE(rng); - } - OPAL_LIST_DESTRUCT(&flags); - - /* pack the string */ - opal_output_verbose(1, orte_regx_base_framework.framework_output, - "%s FLAG ASSIGNMENTS: %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp); - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (NULL != tmp) { - free(tmp); - } - - /* don't try to be cute - there aren't going to be that many - * topologies, so just scan the list and see if they are the - * same, excluding any NULL values */ - ortetopo = NULL; - test = false; - OPAL_LIST_FOREACH(rng, &topos, orte_regex_range_t) { - if (NULL == rng->t) { - continue; - } - if (NULL == ortetopo) { - ortetopo = rng->t; - } else if (0 != strcmp(ortetopo->sig, rng->t->sig)) { - /* we have a difference, so send them */ - test = true; - } - } - tmp = NULL; - if (test) { - opal_buffer_t bucket, *bptr; - OBJ_CONSTRUCT(&bucket, opal_buffer_t); - while (NULL != (item = opal_list_remove_first(&topos))) { - rng = (orte_regex_range_t*)item; - opal_output_verbose(5, orte_regx_base_framework.framework_output, - "%s PASSING TOPOLOGY %s RANGE %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (NULL == rng->t) ? "NULL" : rng->t->sig, rng->cnt); - if (NULL == tmp) { - opal_asprintf(&tmp, "%d", rng->cnt); - } else { - opal_asprintf(&tmp2, "%s,%d", tmp, rng->cnt); - free(tmp); - tmp = tmp2; - } - if (NULL == rng->t) { - /* need to account for NULL topology */ - opal_output_verbose(1, orte_regx_base_framework.framework_output, - "%s PACKING NULL TOPOLOGY", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - tmp2 = NULL; - if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &tmp2, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(rng); - OPAL_LIST_DESTRUCT(&topos); - OBJ_DESTRUCT(&bucket); - free(tmp); - return rc; - } - } else { - opal_output_verbose(1, orte_regx_base_framework.framework_output, - "%s PACKING TOPOLOGY: %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), rng->t->sig); - /* pack this topology string */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &rng->t->sig, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(rng); - OPAL_LIST_DESTRUCT(&topos); - OBJ_DESTRUCT(&bucket); - free(tmp); - return rc; - } - /* pack the topology itself */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &rng->t->topo, 1, OPAL_HWLOC_TOPO))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(rng); - OPAL_LIST_DESTRUCT(&topos); - OBJ_DESTRUCT(&bucket); - free(tmp); - return rc; - } - } - OBJ_RELEASE(rng); - } - OPAL_LIST_DESTRUCT(&topos); - /* pack the string */ - opal_output_verbose(1, orte_regx_base_framework.framework_output, - "%s TOPOLOGY ASSIGNMENTS: %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), tmp); - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&bucket); - free(tmp); - return rc; - } - free(tmp); - - /* now pack the topologies */ - bptr = &bucket; - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &bptr, 1, OPAL_BUFFER))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&bucket); - return rc; - } - OBJ_DESTRUCT(&bucket); - } else { - opal_output_verbose(1, orte_regx_base_framework.framework_output, - "%s NOT PASSING TOPOLOGIES", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - /* need to pack the NULL just to terminate the region */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &tmp, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } - - return ORTE_SUCCESS; -} - -int orte_regx_base_decode_daemon_nodemap(opal_buffer_t *buffer) -{ - int n, nn, rc, cnt, offset; - orte_node_t *node; - char *slots=NULL, *topos=NULL, *flags=NULL; - char *rmndr, **tmp; - opal_list_t slts, flgs;; - opal_buffer_t *bptr=NULL; - orte_topology_t *t2; - orte_regex_range_t *rng, *srng, *frng; - uint8_t ui8; - - OBJ_CONSTRUCT(&slts, opal_list_t); - OBJ_CONSTRUCT(&flgs, opal_list_t); - - /* unpack the flag indicating if the HNP was allocated */ - n = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ui8, &n, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - if (0 == ui8) { - orte_hnp_is_allocated = false; - } else { - orte_hnp_is_allocated = true; - } - - /* unpack the flag indicating we are in a managed allocation */ - n = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &ui8, &n, OPAL_UINT8))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - if (0 == ui8) { - orte_managed_allocation = false; - } else { - orte_managed_allocation = true; - } - - /* unpack the slots regex */ - n = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &slots, &n, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - /* this is not allowed to be NULL */ - if (NULL == slots) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - rc = ORTE_ERR_BAD_PARAM; - goto cleanup; - } - - /* unpack the flags regex */ - n = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &flags, &n, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - /* this is not allowed to be NULL */ - if (NULL == flags) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - rc = ORTE_ERR_BAD_PARAM; - goto cleanup; - } - - /* unpack the topos regex - this may not have been - * provided (e.g., for a homogeneous machine) */ - n = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &topos, &n, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - if (NULL != topos) { - /* need to unpack the topologies */ - n = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &bptr, &n, OPAL_BUFFER))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - } - - /* if we are the HNP, then we just discard these strings as we already - * have a complete picture - but we needed to unpack them in order to - * maintain sync in the unpacking order */ - if (ORTE_PROC_IS_HNP) { - rc = ORTE_SUCCESS; - goto cleanup; - } - - /* decompress the slots */ - tmp = opal_argv_split(slots, ','); - for (n=0; NULL != tmp[n]; n++) { - rng = OBJ_NEW(orte_regex_range_t); - opal_list_append(&slts, &rng->super); - /* find the '[' as that delimits the value */ - rmndr = strchr(tmp[n], '['); - if (NULL == rmndr) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - rc = ORTE_ERR_BAD_PARAM; - opal_argv_free(tmp); - goto cleanup; - } - *rmndr = '\0'; - ++rmndr; - /* convert that number as this is the number of - * slots for this range */ - rng->slots = strtoul(rmndr, NULL, 10); - /* convert the initial number as that is the cnt */ - rng->cnt = strtoul(tmp[n], NULL, 10); - } - opal_argv_free(tmp); - - /* decompress the flags */ - tmp = opal_argv_split(flags, ','); - for (n=0; NULL != tmp[n]; n++) { - rng = OBJ_NEW(orte_regex_range_t); - opal_list_append(&flgs, &rng->super); - /* find the '[' as that delimits the value */ - rmndr = strchr(tmp[n], '['); - if (NULL == rmndr) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - opal_argv_free(tmp); - rc = ORTE_ERR_BAD_PARAM; - goto cleanup; - } - *rmndr = '\0'; - ++rmndr; - /* check the value - it is just one character */ - if ('1' == *rmndr) { - rng->slots = 1; - } else { - rng->slots = 0; - } - /* convert the initial number as that is the cnt */ - rng->cnt = strtoul(tmp[n], NULL, 10); - } - opal_argv_free(tmp); - free(flags); - - /* update the node array */ - srng = (orte_regex_range_t*)opal_list_get_first(&slts); - frng = (orte_regex_range_t*)opal_list_get_first(&flgs); - for (n=0; n < orte_node_pool->size; n++) { - if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { - continue; - } - /* set the number of slots */ - node->slots = srng->slots; - srng->cnt--; - if (0 == srng->cnt) { - srng = (orte_regex_range_t*)opal_list_get_next(&srng->super); - } - /* set the flags */ - if (0 == frng->slots) { - ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_SLOTS_GIVEN); - } else { - ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN); - } - frng->cnt--; - if (0 == frng->cnt) { - frng = (orte_regex_range_t*)opal_list_get_next(&frng->super); - } - } - - /* if no topology info was passed, then everyone shares our topology */ - if (NULL == bptr) { - /* our topology is first in the array */ - t2 = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, 0); - opal_output_verbose(1, orte_regx_base_framework.framework_output, - "%s ASSIGNING ALL TOPOLOGIES TO: %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), t2->sig); - for (n=0; n < orte_node_pool->size; n++) { - if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { - if (NULL == node->topology) { - OBJ_RETAIN(t2); - node->topology = t2; - } - } - } - } else { - char *sig; - hwloc_topology_t topo; - /* decompress the topology regex */ - tmp = opal_argv_split(topos, ','); - /* there must be a topology definition for each range */ - offset = 0; - for (nn=0; NULL != tmp[nn]; nn++) { - cnt = strtoul(tmp[nn], NULL, 10); - /* unpack the signature */ - n = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(bptr, &sig, &n, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - opal_argv_free(tmp); - OBJ_RELEASE(bptr); - goto cleanup; - } - if (NULL == sig) { - /* the nodes in this range have not reported a topology, - * so skip them */ - offset += cnt; - continue; - } - n = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(bptr, &topo, &n, OPAL_HWLOC_TOPO))) { - ORTE_ERROR_LOG(rc); - opal_argv_free(tmp); - OBJ_RELEASE(bptr); - free(sig); - goto cleanup; - } - /* see if we already have this topology - could be an update */ - t2 = NULL; - for (n=0; n < orte_node_topologies->size; n++) { - if (NULL == (t2 = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, n))) { - continue; - } - if (0 == strcmp(t2->sig, sig)) { - /* found a match */ - free(sig); - opal_hwloc_base_free_topology(topo); - sig = NULL; - break; - } - } - if (NULL != sig || NULL == t2) { - /* new topology - record it */ - t2 = OBJ_NEW(orte_topology_t); - t2->sig = sig; - t2->topo = topo; - opal_pointer_array_add(orte_node_topologies, t2); - } - /* point each of the nodes in this range to this topology */ - n=0; - while (n < cnt && (n+offset) < orte_node_pool->size) { - if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n+offset))) { - continue; - } - opal_output_verbose(1, orte_regx_base_framework.framework_output, - "%s ASSIGNING NODE %s WITH TOPO: %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - node->name, t2->sig); - if (NULL == node->topology) { - OBJ_RETAIN(t2); - node->topology = t2; - } - ++n; - } - offset += cnt; - } - OBJ_RELEASE(bptr); - opal_argv_free(tmp); - } - - cleanup: - OPAL_LIST_DESTRUCT(&slts); - OPAL_LIST_DESTRUCT(&flgs); - return rc; -} - -int orte_regx_base_generate_ppn(orte_job_t *jdata, char **ppn) -{ - orte_nidmap_regex_t *prng, **actives; - opal_list_t *prk; - orte_node_t *nptr; - orte_proc_t *proc; - size_t n; - int *cnt, i, k; - char *tmp2, *ptmp, **cache = NULL; - - /* create an array of lists to handle the number of app_contexts in this job */ - prk = (opal_list_t*)malloc(jdata->num_apps * sizeof(opal_list_t)); - cnt = (int*)malloc(jdata->num_apps * sizeof(int)); - actives = (orte_nidmap_regex_t**)malloc(jdata->num_apps * sizeof(orte_nidmap_regex_t*)); - for (n=0; n < jdata->num_apps; n++) { - OBJ_CONSTRUCT(&prk[n], opal_list_t); - actives[n] = NULL; - } - - /* we provide a complete map in the regex, with an entry for every - * node in the pool */ - for (i=0; i < orte_node_pool->size; i++) { - if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { - continue; - } - /* if a daemon has been assigned, then count how many procs - * for each app_context from the specified job are assigned to this node */ - memset(cnt, 0, jdata->num_apps * sizeof(int)); - if (NULL != nptr->daemon) { - for (k=0; k < nptr->procs->size; k++) { - if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(nptr->procs, k))) { - if (proc->name.jobid == jdata->jobid) { - ++cnt[proc->app_idx]; - } - } - } - } - /* track the #procs on this node */ - for (n=0; n < jdata->num_apps; n++) { - if (NULL == actives[n]) { - /* just starting */ - actives[n] = OBJ_NEW(orte_nidmap_regex_t); - actives[n]->nprocs = cnt[n]; - actives[n]->cnt = 1; - opal_list_append(&prk[n], &actives[n]->super); - } else { - /* is this the next in line */ - if (cnt[n] == actives[n]->nprocs) { - actives[n]->cnt++; - } else { - /* need to start another range */ - actives[n] = OBJ_NEW(orte_nidmap_regex_t); - actives[n]->nprocs = cnt[n]; - actives[n]->cnt = 1; - opal_list_append(&prk[n], &actives[n]->super); - } - } - } - } - - /* construct the regex from the found ranges for each app_context */ - ptmp = NULL; - for (n=0; n < jdata->num_apps; n++) { - OPAL_LIST_FOREACH(prng, &prk[n], orte_nidmap_regex_t) { - if (1 < prng->cnt) { - if (NULL == ptmp) { - opal_asprintf(&ptmp, "%u(%u)", prng->nprocs, prng->cnt); - } else { - opal_asprintf(&tmp2, "%s,%u(%u)", ptmp, prng->nprocs, prng->cnt); - free(ptmp); - ptmp = tmp2; - } - } else { - if (NULL == ptmp) { - opal_asprintf(&ptmp, "%u", prng->nprocs); - } else { - opal_asprintf(&tmp2, "%s,%u", ptmp, prng->nprocs); - free(ptmp); - ptmp = tmp2; - } - } - } - OPAL_LIST_DESTRUCT(&prk[n]); // releases all the actives objects - if (NULL != ptmp) { - opal_argv_append_nosize(&cache, ptmp); - free(ptmp); - ptmp = NULL; - } - } - free(prk); - free(cnt); - free(actives); - - *ppn = opal_argv_join(cache, '@'); - opal_argv_free(cache); - - return ORTE_SUCCESS; -} - -int orte_regx_base_parse_ppn(orte_job_t *jdata, char *regex) -{ - orte_node_t *node; - orte_proc_t *proc; - int n, k, m, cnt; - char **tmp, *ptr, **ppn; - orte_nidmap_regex_t *rng; - opal_list_t trk; - int rc = ORTE_SUCCESS; - - /* split the regex by app_context */ - tmp = opal_argv_split(regex, '@'); - - /* for each app_context, set the ppn */ - for (n=0; NULL != tmp[n]; n++) { - ppn = opal_argv_split(tmp[n], ','); - /* decompress the ppn */ - OBJ_CONSTRUCT(&trk, opal_list_t); - for (m=0; NULL != ppn[m]; m++) { - rng = OBJ_NEW(orte_nidmap_regex_t); - opal_list_append(&trk, &rng->super); - /* check for a count */ - if (NULL != (ptr = strchr(ppn[m], '('))) { - ppn[m][strlen(ppn[m])-1] = '\0'; // remove trailing paren - *ptr = '\0'; - ++ptr; - rng->cnt = strtoul(ptr, NULL, 10); - } else { - rng->cnt = 1; - } - /* convert the number */ - rng->nprocs = strtoul(ppn[m], NULL, 10); - } - opal_argv_free(ppn); - - /* cycle thru our node pool and add the indicated number of procs - * to each node */ - rng = (orte_nidmap_regex_t*)opal_list_get_first(&trk); - cnt = 0; - for (m=0; m < orte_node_pool->size; m++) { - if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, m))) { - continue; - } - /* see if it has any procs for this job and app_context */ - if (0 < rng->nprocs) { - /* add this node to the job map if it isn't already there */ - if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { - OBJ_RETAIN(node); - ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); - opal_pointer_array_add(jdata->map->nodes, node); - } - /* create a proc object for each one */ - for (k=0; k < rng->nprocs; k++) { - proc = OBJ_NEW(orte_proc_t); - proc->name.jobid = jdata->jobid; - /* leave the vpid undefined as this will be determined - * later when we do the overall ranking */ - proc->app_idx = n; - proc->parent = node->daemon->name.vpid; - OBJ_RETAIN(node); - proc->node = node; - /* flag the proc as ready for launch */ - proc->state = ORTE_PROC_STATE_INIT; - opal_pointer_array_add(node->procs, proc); - /* we will add the proc to the jdata array when we - * compute its rank */ - } - node->num_procs += rng->nprocs; - } - ++cnt; - if (rng->cnt <= cnt) { - rng = (orte_nidmap_regex_t*)opal_list_get_next(&rng->super); - if (NULL == rng) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - opal_argv_free(tmp); - rc = ORTE_ERR_NOT_FOUND; - goto complete; - } - cnt = 0; - } - } - OPAL_LIST_DESTRUCT(&trk); - } - opal_argv_free(tmp); - - complete: - /* reset any node map flags we used so the next job will start clean */ - for (n=0; n < jdata->map->nodes->size; n++) { - if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, n))) { - ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED); - } - } - - return rc; -} - - -static int regex_parse_node_range(char *base, char *range, int num_digits, char *suffix, char ***names); - -/* - * Parse one or more ranges in a set - * - * @param base The base text of the node name - * @param *ranges A pointer to a range. This can contain multiple ranges - * (i.e. "1-3,10" or "5" or "9,0100-0130,250") - * @param ***names An argv array to add the newly discovered nodes to - */ -static int regex_parse_node_ranges(char *base, char *ranges, int num_digits, char *suffix, char ***names) -{ - int i, len, ret; - char *start, *orig; - - /* Look for commas, the separator between ranges */ - - len = strlen(ranges); - for (orig = start = ranges, i = 0; i < len; ++i) { - if (',' == ranges[i]) { - ranges[i] = '\0'; - ret = regex_parse_node_range(base, start, num_digits, suffix, names); - if (ORTE_SUCCESS != ret) { - ORTE_ERROR_LOG(ret); - return ret; - } - start = ranges + i + 1; - } - } - - /* Pick up the last range, if it exists */ - - if (start < orig + len) { - - OPAL_OUTPUT_VERBOSE((1, orte_debug_output, - "%s regex:parse:ranges: parse range %s (2)", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), start)); - - ret = regex_parse_node_range(base, start, num_digits, suffix, names); - if (ORTE_SUCCESS != ret) { - ORTE_ERROR_LOG(ret); - return ret; - } - } - - /* All done */ - return ORTE_SUCCESS; -} - - -/* - * Parse a single range in a set and add the full names of the nodes - * found to the names argv - * - * @param base The base text of the node name - * @param *ranges A pointer to a single range. (i.e. "1-3" or "5") - * @param ***names An argv array to add the newly discovered nodes to - */ -static int regex_parse_node_range(char *base, char *range, int num_digits, char *suffix, char ***names) -{ - char *str, tmp[132]; - size_t i, k, start, end; - size_t base_len, len; - bool found; - int ret; - - if (NULL == base || NULL == range) { - return ORTE_ERROR; - } - - len = strlen(range); - base_len = strlen(base); - /* Silence compiler warnings; start and end are always assigned - properly, below */ - start = end = 0; - - /* Look for the beginning of the first number */ - - for (found = false, i = 0; i < len; ++i) { - if (isdigit((int) range[i])) { - if (!found) { - start = atoi(range + i); - found = true; - break; - } - } - } - if (!found) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - - /* Look for the end of the first number */ - - for (found = false; i < len; ++i) { - if (!isdigit(range[i])) { - break; - } - } - - /* Was there no range, just a single number? */ - - if (i >= len) { - end = start; - found = true; - } else { - /* Nope, there was a range. Look for the beginning of the second - * number - */ - for (; i < len; ++i) { - if (isdigit(range[i])) { - end = strtol(range + i, NULL, 10); - found = true; - break; - } - } - } - if (!found) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - - /* Make strings for all values in the range */ - - len = base_len + num_digits + 32; - if (NULL != suffix) { - len += strlen(suffix); - } - str = (char *) malloc(len); - if (NULL == str) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - for (i = start; i <= end; ++i) { - memset(str, 0, len); - strcpy(str, base); - /* we need to zero-pad the digits */ - for (k=0; k < (size_t)num_digits; k++) { - str[k+base_len] = '0'; - } - memset(tmp, 0, 132); - snprintf(tmp, 132, "%lu", (unsigned long)i); - for (k=0; k < strlen(tmp); k++) { - str[base_len + num_digits - k - 1] = tmp[strlen(tmp)-k-1]; - } - /* if there is a suffix, add it */ - if (NULL != suffix) { - strcat(str, suffix); - } - ret = opal_argv_append_nosize(names, str); - if(ORTE_SUCCESS != ret) { - ORTE_ERROR_LOG(ret); - free(str); - return ret; - } - } - free(str); - - /* All done */ - return ORTE_SUCCESS; -} - -static int regex_parse_node_range(char *base, char *range, int num_digits, char *suffix, char ***names); - -int orte_regx_base_extract_node_names(char *regexp, char ***names) -{ - int i, j, k, len, ret; - char *base; - char *orig, *suffix; - bool found_range = false; - bool more_to_come = false; - int num_digits; - - if (NULL == regexp) { - *names = NULL; - return ORTE_SUCCESS; - } - - orig = base = strdup(regexp); - if (NULL == base) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - OPAL_OUTPUT_VERBOSE((1, orte_debug_output, - "%s regex:extract:nodenames: checking nodelist: %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - regexp)); - - do { - /* Find the base */ - len = strlen(base); - for (i = 0; i <= len; ++i) { - if (base[i] == '[') { - /* we found a range. this gets dealt with below */ - base[i] = '\0'; - found_range = true; - break; - } - if (base[i] == ',') { - /* we found a singleton node, and there are more to come */ - base[i] = '\0'; - found_range = false; - more_to_come = true; - break; - } - if (base[i] == '\0') { - /* we found a singleton node */ - found_range = false; - more_to_come = false; - break; - } - } - if (i == 0 && !found_range) { - /* we found a special character at the beginning of the string */ - orte_show_help("help-regex.txt", "regex:special-char", true, regexp); - free(orig); - return ORTE_ERR_BAD_PARAM; - } - - if (found_range) { - /* If we found a range, get the number of digits in the numbers */ - i++; /* step over the [ */ - for (j=i; j < len; j++) { - if (base[j] == ':') { - base[j] = '\0'; - break; - } - } - if (j >= len) { - /* we didn't find the number of digits */ - orte_show_help("help-regex.txt", "regex:num-digits-missing", true, regexp); - free(orig); - return ORTE_ERR_BAD_PARAM; - } - num_digits = strtol(&base[i], NULL, 10); - i = j + 1; /* step over the : */ - /* now find the end of the range */ - for (j = i; j < len; ++j) { - if (base[j] == ']') { - base[j] = '\0'; - break; - } - } - if (j >= len) { - /* we didn't find the end of the range */ - orte_show_help("help-regex.txt", "regex:end-range-missing", true, regexp); - free(orig); - return ORTE_ERR_BAD_PARAM; - } - /* check for a suffix */ - if (j+1 < len && base[j+1] != ',') { - /* find the next comma, if present */ - for (k=j+1; k < len && base[k] != ','; k++); - if (k < len) { - base[k] = '\0'; - } - suffix = strdup(&base[j+1]); - if (k < len) { - base[k] = ','; - } - j = k-1; - } else { - suffix = NULL; - } - OPAL_OUTPUT_VERBOSE((1, orte_debug_output, - "%s regex:extract:nodenames: parsing range %s %s %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - base, base + i, suffix)); - - ret = regex_parse_node_ranges(base, base + i, num_digits, suffix, names); - if (NULL != suffix) { - free(suffix); - } - if (ORTE_SUCCESS != ret) { - orte_show_help("help-regex.txt", "regex:bad-value", true, regexp); - free(orig); - return ret; - } - if (j+1 < len && base[j + 1] == ',') { - more_to_come = true; - base = &base[j + 2]; - } else { - more_to_come = false; - } - } else { - /* If we didn't find a range, just add the node */ - if(ORTE_SUCCESS != (ret = opal_argv_append_nosize(names, base))) { - ORTE_ERROR_LOG(ret); - free(orig); - return ret; - } - /* step over the comma */ - i++; - /* set base equal to the (possible) next base to look at */ - base = &base[i]; - } - } while(more_to_come); - - free(orig); - - /* All done */ - return ret; -} diff --git a/orte/mca/regx/base/regx_base_frame.c b/orte/mca/regx/base/regx_base_frame.c deleted file mode 100644 index 61a8f0bf61..0000000000 --- a/orte/mca/regx/base/regx_base_frame.c +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/constants.h" - -#include - -#include "orte/mca/mca.h" -#include "opal/util/argv.h" -#include "opal/util/output.h" -#include "opal/mca/base/base.h" - -#include "orte/runtime/orte_globals.h" -#include "orte/util/show_help.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/regx/base/base.h" -/* - * The following file was created by configure. It contains extern - * statements and the definition of an array of pointers to each - * component's public mca_base_component_t struct. - */ - -#include "orte/mca/regx/base/static-components.h" - -/* - * Global variables - */ -orte_regx_base_module_t orte_regx = {0}; - -static int orte_regx_base_close(void) -{ - /* give the selected module a chance to finalize */ - if (NULL != orte_regx.finalize) { - orte_regx.finalize(); - } - return mca_base_framework_components_close(&orte_regx_base_framework, NULL); -} - -/** - * Function for finding and opening either all MCA components, or the one - * that was specifically requested via a MCA parameter. - */ -static int orte_regx_base_open(mca_base_open_flag_t flags) -{ - int rc; - - /* Open up all available components */ - rc = mca_base_framework_components_open(&orte_regx_base_framework, flags); - - /* All done */ - return rc; -} - -MCA_BASE_FRAMEWORK_DECLARE(orte, regx, "ORTE Regx Subsystem", NULL, - orte_regx_base_open, orte_regx_base_close, - mca_regx_base_static_components, 0); - -/* OBJECT INSTANTIATIONS */ -static void nrcon(orte_nidmap_regex_t *p) -{ - p->ctx = 0; - p->nprocs = -1; - p->cnt = 0; -} -OBJ_CLASS_INSTANCE(orte_nidmap_regex_t, - opal_list_item_t, - nrcon, NULL); diff --git a/orte/mca/regx/base/regx_base_select.c b/orte/mca/regx/base/regx_base_select.c deleted file mode 100644 index 35cd36dc2a..0000000000 --- a/orte/mca/regx/base/regx_base_select.c +++ /dev/null @@ -1,61 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2018 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "orte_config.h" -#include "orte/constants.h" - -#include "orte/mca/mca.h" -#include "opal/mca/base/base.h" - -#include "orte/mca/regx/base/base.h" - - -/** - * Function for selecting one component from all those that are - * available. - */ -int orte_regx_base_select(void) -{ - orte_regx_base_component_t *best_component = NULL; - orte_regx_base_module_t *best_module = NULL; - int rc = ORTE_SUCCESS; - - /* - * Select the best component - */ - if (OPAL_SUCCESS != mca_base_select("regx", orte_regx_base_framework.framework_output, - &orte_regx_base_framework.framework_components, - (mca_base_module_t **) &best_module, - (mca_base_component_t **) &best_component, NULL)) { - /* This will only happen if no component was selected */ - return ORTE_ERR_NOT_FOUND; - } - - /* Save the winner */ - orte_regx = *best_module; - /* give it a chance to init */ - if (NULL != orte_regx.init) { - rc = orte_regx.init(); - } - return rc; -} diff --git a/orte/mca/regx/fwd/Makefile.am b/orte/mca/regx/fwd/Makefile.am deleted file mode 100644 index 44cd769f23..0000000000 --- a/orte/mca/regx/fwd/Makefile.am +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) 2016-2018 Intel, Inc. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - regx_fwd_component.c \ - regx_fwd.h \ - regx_fwd.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_regx_fwd_DSO -component_noinst = -component_install = mca_regx_fwd.la -else -component_noinst = libmca_regx_fwd.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_regx_fwd_la_SOURCES = $(sources) -mca_regx_fwd_la_LDFLAGS = -module -avoid-version -mca_regx_fwd_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la - -noinst_LTLIBRARIES = $(component_noinst) -libmca_regx_fwd_la_SOURCES = $(sources) -libmca_regx_fwd_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/regx/fwd/owner.txt b/orte/mca/regx/fwd/owner.txt deleted file mode 100644 index 85b4416d20..0000000000 --- a/orte/mca/regx/fwd/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: INTEL -status: active diff --git a/orte/mca/regx/fwd/regx_fwd.c b/orte/mca/regx/fwd/regx_fwd.c deleted file mode 100644 index 18405ae297..0000000000 --- a/orte/mca/regx/fwd/regx_fwd.c +++ /dev/null @@ -1,300 +0,0 @@ -/* - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#include "orte_config.h" -#include "orte/types.h" -#include "opal/types.h" - -#ifdef HAVE_UNISTD_H -#include -#endif -#include - -#include "opal/util/argv.h" -#include "opal/util/basename.h" -#include "opal/util/opal_environ.h" - -#include "orte/runtime/orte_globals.h" -#include "orte/util/name_fns.h" -#include "orte/util/show_help.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rmaps/base/base.h" -#include "orte/mca/routed/routed.h" -#include "orte/mca/regx/base/base.h" - -#include "regx_fwd.h" - -static int nidmap_create(opal_pointer_array_t *pool, char **regex); - -orte_regx_base_module_t orte_regx_fwd_module = { - .nidmap_create = nidmap_create, - .nidmap_parse = orte_regx_base_nidmap_parse, - .extract_node_names = orte_regx_base_extract_node_names, - .encode_nodemap = orte_regx_base_encode_nodemap, - .decode_daemon_nodemap = orte_regx_base_decode_daemon_nodemap, - .generate_ppn = orte_regx_base_generate_ppn, - .parse_ppn = orte_regx_base_parse_ppn -}; - -static int nidmap_create(opal_pointer_array_t *pool, char **regex) -{ - char *node; - char prefix[ORTE_MAX_NODE_PREFIX]; - int i, j, n, len, startnum, nodenum, numdigits; - bool found; - char *suffix, *sfx, *nodenames; - orte_regex_node_t *ndreg; - orte_regex_range_t *range, *rng; - opal_list_t nodenms, dvpids; - opal_list_item_t *item, *itm2; - char **regexargs = NULL, *tmp, *tmp2; - orte_node_t *nptr; - orte_vpid_t vpid; - - OBJ_CONSTRUCT(&nodenms, opal_list_t); - OBJ_CONSTRUCT(&dvpids, opal_list_t); - - rng = NULL; - for (n=0; n < pool->size; n++) { - if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(pool, n))) { - continue; - } - /* if no daemon has been assigned, then this node is not being used */ - if (NULL == nptr->daemon) { - vpid = -1; // indicates no daemon assigned - } else { - vpid = nptr->daemon->name.vpid; - } - /* deal with the daemon vpid - see if it is next in the - * current range */ - if (NULL == rng) { - /* just starting */ - rng = OBJ_NEW(orte_regex_range_t); - rng->vpid = vpid; - rng->cnt = 1; - opal_list_append(&dvpids, &rng->super); - } else if (UINT32_MAX == vpid) { - if (-1 == rng->vpid) { - rng->cnt++; - } else { - /* need to start another range */ - rng = OBJ_NEW(orte_regex_range_t); - rng->vpid = vpid; - rng->cnt = 1; - opal_list_append(&dvpids, &rng->super); - } - } else if (-1 == rng->vpid) { - /* need to start another range */ - rng = OBJ_NEW(orte_regex_range_t); - rng->vpid = vpid; - rng->cnt = 1; - opal_list_append(&dvpids, &rng->super); - } else { - /* is this the next in line */ - if (vpid == (orte_vpid_t)(rng->vpid + rng->cnt)) { - rng->cnt++; - } else { - /* need to start another range */ - rng = OBJ_NEW(orte_regex_range_t); - rng->vpid = vpid; - rng->cnt = 1; - opal_list_append(&dvpids, &rng->super); - } - } - node = nptr->name; - /* determine this node's prefix by looking for first digit char */ - len = strlen(node); - startnum = -1; - memset(prefix, 0, ORTE_MAX_NODE_PREFIX); - for (i=0, j=0; i < len; i++) { - /* valid hostname characters are ascii letters, digits and the '-' character. */ - if (isdigit(node[i])) { - /* count the size of the numeric field - but don't - * add the digits to the prefix - */ - if (startnum < 0) { - /* okay, this defines end of the prefix */ - startnum = i; - } - continue; - } - /* this must be either an alpha, a '.', or '-' */ - if (!isalpha(node[i]) && '-' != node[i] && '.' != node[i]) { - orte_show_help("help-regex.txt", "regex:invalid-name", true, node); - return ORTE_ERR_SILENT; - } - if (startnum < 0) { - prefix[j++] = node[i]; - } - } - if (startnum < 0) { - /* can't compress this name - just add it to the list */ - ndreg = OBJ_NEW(orte_regex_node_t); - ndreg->prefix = strdup(node); - opal_list_append(&nodenms, &ndreg->super); - continue; - } - /* convert the digits and get any suffix */ - nodenum = strtol(&node[startnum], &sfx, 10); - if (NULL != sfx) { - suffix = strdup(sfx); - numdigits = (int)(sfx - &node[startnum]); - } else { - suffix = NULL; - numdigits = (int)strlen(&node[startnum]); - } - /* is this node name already on our list? */ - found = false; - if (0 != opal_list_get_size(&nodenms)) { - ndreg = (orte_regex_node_t*)opal_list_get_last(&nodenms); - - if ((0 < strlen(prefix) && NULL == ndreg->prefix) || - (0 == strlen(prefix) && NULL != ndreg->prefix) || - (0 < strlen(prefix) && NULL != ndreg->prefix && - 0 != strcmp(prefix, ndreg->prefix)) || - (NULL == suffix && NULL != ndreg->suffix) || - (NULL != suffix && NULL == ndreg->suffix) || - (NULL != suffix && NULL != ndreg->suffix && - 0 != strcmp(suffix, ndreg->suffix)) || - (numdigits != ndreg->num_digits)) { - found = false; - } else { - /* found a match - flag it */ - found = true; - } - } - if (found) { - range = (orte_regex_range_t*)opal_list_get_last(&ndreg->ranges); - if (NULL == range) { - /* first range for this nodeid */ - range = OBJ_NEW(orte_regex_range_t); - range->vpid = nodenum; - range->cnt = 1; - opal_list_append(&ndreg->ranges, &range->super); - /* see if the node number is out of sequence */ - } else if (nodenum != (range->vpid + range->cnt)) { - /* start a new range */ - range = OBJ_NEW(orte_regex_range_t); - range->vpid = nodenum; - range->cnt = 1; - opal_list_append(&ndreg->ranges, &range->super); - } else { - /* everything matches - just increment the cnt */ - range->cnt++; - } - } else { - /* need to add it */ - ndreg = OBJ_NEW(orte_regex_node_t); - if (0 < strlen(prefix)) { - ndreg->prefix = strdup(prefix); - } - if (NULL != suffix) { - ndreg->suffix = strdup(suffix); - } - ndreg->num_digits = numdigits; - opal_list_append(&nodenms, &ndreg->super); - /* record the first range for this nodeid - we took - * care of names we can't compress above - */ - range = OBJ_NEW(orte_regex_range_t); - range->vpid = nodenum; - range->cnt = 1; - opal_list_append(&ndreg->ranges, &range->super); - } - if (NULL != suffix) { - free(suffix); - } - } - /* begin constructing the regular expression */ - while (NULL != (item = opal_list_remove_first(&nodenms))) { - ndreg = (orte_regex_node_t*)item; - - /* if no ranges, then just add the name */ - if (0 == opal_list_get_size(&ndreg->ranges)) { - if (NULL != ndreg->prefix) { - /* solitary node */ - opal_asprintf(&tmp, "%s", ndreg->prefix); - opal_argv_append_nosize(®exargs, tmp); - free(tmp); - } - OBJ_RELEASE(ndreg); - continue; - } - /* start the regex for this nodeid with the prefix */ - if (NULL != ndreg->prefix) { - opal_asprintf(&tmp, "%s[%d:", ndreg->prefix, ndreg->num_digits); - } else { - opal_asprintf(&tmp, "[%d:", ndreg->num_digits); - } - /* add the ranges */ - while (NULL != (itm2 = opal_list_remove_first(&ndreg->ranges))) { - range = (orte_regex_range_t*)itm2; - if (1 == range->cnt) { - opal_asprintf(&tmp2, "%s%u,", tmp, range->vpid); - } else { - opal_asprintf(&tmp2, "%s%u-%u,", tmp, range->vpid, range->vpid + range->cnt - 1); - } - free(tmp); - tmp = tmp2; - OBJ_RELEASE(range); - } - /* replace the final comma */ - tmp[strlen(tmp)-1] = ']'; - if (NULL != ndreg->suffix) { - /* add in the suffix, if provided */ - opal_asprintf(&tmp2, "%s%s", tmp, ndreg->suffix); - free(tmp); - tmp = tmp2; - } - opal_argv_append_nosize(®exargs, tmp); - free(tmp); - OBJ_RELEASE(ndreg); - } - - /* assemble final result */ - nodenames = opal_argv_join(regexargs, ','); - /* cleanup */ - opal_argv_free(regexargs); - OBJ_DESTRUCT(&nodenms); - - /* do the same for the vpids */ - tmp = NULL; - while (NULL != (item = opal_list_remove_first(&dvpids))) { - rng = (orte_regex_range_t*)item; - if (1 < rng->cnt) { - if (NULL == tmp) { - opal_asprintf(&tmp, "%u(%u)", rng->vpid, rng->cnt); - } else { - opal_asprintf(&tmp2, "%s,%u(%u)", tmp, rng->vpid, rng->cnt); - free(tmp); - tmp = tmp2; - } - } else { - if (NULL == tmp) { - opal_asprintf(&tmp, "%u", rng->vpid); - } else { - opal_asprintf(&tmp2, "%s,%u", tmp, rng->vpid); - free(tmp); - tmp = tmp2; - } - } - OBJ_RELEASE(rng); - } - OPAL_LIST_DESTRUCT(&dvpids); - - /* now concatenate the results into one string */ - opal_asprintf(&tmp2, "%s@%s", nodenames, tmp); - free(nodenames); - free(tmp); - *regex = tmp2; - return ORTE_SUCCESS; -} diff --git a/orte/mca/regx/fwd/regx_fwd.h b/orte/mca/regx/fwd/regx_fwd.h deleted file mode 100644 index c6f4a966d1..0000000000 --- a/orte/mca/regx/fwd/regx_fwd.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef _MCA_REGX_FwD_H_ -#define _MCA_REGX_FwD_H_ - -#include "orte_config.h" - -#include "orte/types.h" - -#include "opal/mca/base/base.h" -#include "orte/mca/regx/regx.h" - - -BEGIN_C_DECLS - -ORTE_MODULE_DECLSPEC extern orte_regx_base_component_t mca_regx_fwd_component; -extern orte_regx_base_module_t orte_regx_fwd_module; - -END_C_DECLS - -#endif /* MCA_REGX_FwD_H_ */ diff --git a/orte/mca/regx/fwd/regx_fwd_component.c b/orte/mca/regx/fwd/regx_fwd_component.c deleted file mode 100644 index 822cf622dd..0000000000 --- a/orte/mca/regx/fwd/regx_fwd_component.c +++ /dev/null @@ -1,44 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/types.h" -#include "opal/types.h" - -#include "opal/util/show_help.h" - -#include "orte/mca/regx/regx.h" -#include "regx_fwd.h" - -static int component_query(mca_base_module_t **module, int *priority); - -/* - * Struct of function pointers and all that to let us be initialized - */ -orte_regx_base_component_t mca_regx_fwd_component = { - .base_version = { - MCA_REGX_BASE_VERSION_1_0_0, - .mca_component_name = "fwd", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - .mca_query_component = component_query, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - -static int component_query(mca_base_module_t **module, int *priority) -{ - *module = (mca_base_module_t*)&orte_regx_fwd_module; - *priority = 10; - return ORTE_SUCCESS; -} diff --git a/orte/mca/regx/regx.h b/orte/mca/regx/regx.h deleted file mode 100644 index 2d3630e0b6..0000000000 --- a/orte/mca/regx/regx.h +++ /dev/null @@ -1,127 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2015-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Open RTE Personality Framework (regx) - * - * Multi-select framework so that multiple personalities can be - * simultaneously supported - * - */ - -#ifndef ORTE_MCA_REGX_H -#define ORTE_MCA_REGX_H - -#include "orte_config.h" -#include "orte/types.h" - -#include "opal/class/opal_pointer_array.h" -#include "opal/dss/dss_types.h" -#include "orte/mca/mca.h" - -#include "orte/runtime/orte_globals.h" - - -BEGIN_C_DECLS - -/* - * regx module functions - */ - -#define ORTE_MAX_NODE_PREFIX 50 -#define ORTE_CONTIG_NODE_CMD 0x01 -#define ORTE_NON_CONTIG_NODE_CMD 0x02 - -/** -* REGX module functions - the modules are accessed via -* the base stub functions -*/ -typedef struct { - opal_list_item_t super; - int ctx; - int nprocs; - int cnt; -} orte_nidmap_regex_t; -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_nidmap_regex_t); - -/* initialize the module - allow it to do whatever one-time - * things it requires */ -typedef int (*orte_regx_base_module_init_fn_t)(void); - -typedef int (*orte_regx_base_module_nidmap_create_fn_t)(opal_pointer_array_t *pool, char **regex); -typedef int (*orte_regx_base_module_nidmap_parse_fn_t)(char *regex); -typedef int (*orte_regx_base_module_extract_node_names_fn_t)(char *regexp, char ***names); - -/* create a regular expression describing the nodes in the - * allocation */ -typedef int (*orte_regx_base_module_encode_nodemap_fn_t)(opal_buffer_t *buffer); - -/* decode a regular expression created by the encode function - * into the orte_node_pool array */ -typedef int (*orte_regx_base_module_decode_daemon_nodemap_fn_t)(opal_buffer_t *buffer); - -typedef int (*orte_regx_base_module_build_daemon_nidmap_fn_t)(void); - -/* create a regular expression describing the ppn for a job */ -typedef int (*orte_regx_base_module_generate_ppn_fn_t)(orte_job_t *jdata, char **ppn); - -/* decode the ppn */ -typedef int (*orte_regx_base_module_parse_ppn_fn_t)(orte_job_t *jdata, char *ppn); - - -/* give the component a chance to cleanup */ -typedef void (*orte_regx_base_module_finalize_fn_t)(void); - -/* - * regx module version 1.0.0 - */ -typedef struct { - orte_regx_base_module_init_fn_t init; - orte_regx_base_module_nidmap_create_fn_t nidmap_create; - orte_regx_base_module_nidmap_parse_fn_t nidmap_parse; - orte_regx_base_module_extract_node_names_fn_t extract_node_names; - orte_regx_base_module_encode_nodemap_fn_t encode_nodemap; - orte_regx_base_module_decode_daemon_nodemap_fn_t decode_daemon_nodemap; - orte_regx_base_module_build_daemon_nidmap_fn_t build_daemon_nidmap; - orte_regx_base_module_generate_ppn_fn_t generate_ppn; - orte_regx_base_module_parse_ppn_fn_t parse_ppn; - orte_regx_base_module_finalize_fn_t finalize; -} orte_regx_base_module_t; - -ORTE_DECLSPEC extern orte_regx_base_module_t orte_regx; - -/* - * regx component - */ - -/** - * regx component version 1.0.0 - */ -typedef struct { - /** Base MCA structure */ - mca_base_component_t base_version; - /** Base MCA data */ - mca_base_component_data_t base_data; -} orte_regx_base_component_t; - -/** - * Macro for use in components that are of type regx - */ -#define MCA_REGX_BASE_VERSION_1_0_0 \ - ORTE_MCA_BASE_VERSION_2_1_0("regx", 1, 0, 0) - - -END_C_DECLS - -#endif diff --git a/orte/mca/regx/reverse/Makefile.am b/orte/mca/regx/reverse/Makefile.am deleted file mode 100644 index d527254451..0000000000 --- a/orte/mca/regx/reverse/Makefile.am +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) 2016-2018 Intel, Inc. All rights reserved. -# Copyright (c) 2017 IBM Corporation. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - regx_reverse_component.c \ - regx_reverse.h \ - regx_reverse.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_orte_regx_reverse_DSO -component_noinst = -component_install = mca_regx_reverse.la -else -component_noinst = libmca_regx_reverse.la -component_install = -endif - -mcacomponentdir = $(ortelibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_regx_reverse_la_SOURCES = $(sources) -mca_regx_reverse_la_LDFLAGS = -module -avoid-version -mca_regx_reverse_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la - -noinst_LTLIBRARIES = $(component_noinst) -libmca_regx_reverse_la_SOURCES = $(sources) -libmca_regx_reverse_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/regx/reverse/owner.txt b/orte/mca/regx/reverse/owner.txt deleted file mode 100644 index 2fd247dddb..0000000000 --- a/orte/mca/regx/reverse/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: IBM -status: active diff --git a/orte/mca/regx/reverse/regx_reverse.c b/orte/mca/regx/reverse/regx_reverse.c deleted file mode 100644 index e1a8dd1631..0000000000 --- a/orte/mca/regx/reverse/regx_reverse.c +++ /dev/null @@ -1,319 +0,0 @@ -/* - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. - * Copyright (c) 2018 IBM Corporation. All rights reserved. - * Copyright (c) 2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#include "orte_config.h" -#include "orte/types.h" -#include "opal/types.h" - -#ifdef HAVE_UNISTD_H -#include -#endif -#include - -#include "opal/util/argv.h" -#include "opal/util/basename.h" -#include "opal/util/opal_environ.h" - -#include "orte/runtime/orte_globals.h" -#include "orte/util/name_fns.h" -#include "orte/util/show_help.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rmaps/base/base.h" -#include "orte/mca/routed/routed.h" -#include "orte/mca/regx/base/base.h" - -#include "regx_reverse.h" - -static int nidmap_create(opal_pointer_array_t *pool, char **regex); - -orte_regx_base_module_t orte_regx_reverse_module = { - .nidmap_create = nidmap_create, - .nidmap_parse = orte_regx_base_nidmap_parse, - .extract_node_names = orte_regx_base_extract_node_names, - .encode_nodemap = orte_regx_base_encode_nodemap, - .decode_daemon_nodemap = orte_regx_base_decode_daemon_nodemap, - .generate_ppn = orte_regx_base_generate_ppn, - .parse_ppn = orte_regx_base_parse_ppn -}; - -static int nidmap_create(opal_pointer_array_t *pool, char **regex) -{ - char *node; - char prefix[ORTE_MAX_NODE_PREFIX]; - int i, j, n, len, startnum, nodenum, numdigits; - bool found; - char *suffix, *sfx, *nodenames; - orte_regex_node_t *ndreg; - orte_regex_range_t *range, *rng; - opal_list_t nodenms, dvpids; - opal_list_item_t *item, *itm2; - char **regexargs = NULL, *tmp, *tmp2; - orte_node_t *nptr; - orte_vpid_t vpid; - - OBJ_CONSTRUCT(&nodenms, opal_list_t); - OBJ_CONSTRUCT(&dvpids, opal_list_t); - - rng = NULL; - for (n=0; n < pool->size; n++) { - if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(pool, n))) { - continue; - } - /* if no daemon has been assigned, then this node is not being used */ - if (NULL == nptr->daemon) { - vpid = -1; // indicates no daemon assigned - } else { - vpid = nptr->daemon->name.vpid; - } - /* deal with the daemon vpid - see if it is next in the - * current range */ - if (NULL == rng) { - /* just starting */ - rng = OBJ_NEW(orte_regex_range_t); - rng->vpid = vpid; - rng->cnt = 1; - opal_list_append(&dvpids, &rng->super); - } else if (UINT32_MAX == vpid) { - if (-1 == rng->vpid) { - rng->cnt++; - } else { - /* need to start another range */ - rng = OBJ_NEW(orte_regex_range_t); - rng->vpid = vpid; - rng->cnt = 1; - opal_list_append(&dvpids, &rng->super); - } - } else if (-1 == rng->vpid) { - /* need to start another range */ - rng = OBJ_NEW(orte_regex_range_t); - rng->vpid = vpid; - rng->cnt = 1; - opal_list_append(&dvpids, &rng->super); - } else { - /* is this the next in line */ - if (vpid == (orte_vpid_t)(rng->vpid + rng->cnt)) { - rng->cnt++; - } else { - /* need to start another range */ - rng = OBJ_NEW(orte_regex_range_t); - rng->vpid = vpid; - rng->cnt = 1; - opal_list_append(&dvpids, &rng->super); - } - } - node = nptr->name; - opal_output_verbose(5, orte_regx_base_framework.framework_output, - "%s PROCESS NODE <%s>", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - node); - /* determine this node's prefix by looking for first digit char */ - len = strlen(node); - startnum = -1; - memset(prefix, 0, ORTE_MAX_NODE_PREFIX); - numdigits = 0; - - /* Valid hostname characters are: - * - ascii letters, digits, and the '-' character. - * Determine the prefix in reverse to better support hostnames like: - * c712f6n01, c699c086 where there are sets of digits, and the lowest - * set changes most frequently. - */ - startnum = -1; - memset(prefix, 0, ORTE_MAX_NODE_PREFIX); - numdigits = 0; - for (i=len-1; i >= 0; i--) { - // Count all of the digits - if( isdigit(node[i]) ) { - numdigits++; - continue; - } - else { - // At this point everything at and above position 'i' is prefix. - for( j = 0; j <= i; ++j) { - prefix[j] = node[j]; - } - if (numdigits) { - startnum = j; - } - break; - } - } - - opal_output_verbose(5, orte_regx_base_framework.framework_output, - "%s PROCESS NODE <%s> : reverse / prefix \"%s\" / numdigits %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - node, prefix, numdigits); - - if (startnum < 0) { - /* can't compress this name - just add it to the list */ - ndreg = OBJ_NEW(orte_regex_node_t); - ndreg->prefix = strdup(node); - opal_list_append(&nodenms, &ndreg->super); - continue; - } - /* convert the digits and get any suffix */ - nodenum = strtol(&node[startnum], &sfx, 10); - if (NULL != sfx) { - suffix = strdup(sfx); - } else { - suffix = NULL; - } - /* is this node name already on our list? */ - found = false; - if (0 != opal_list_get_size(&nodenms)) { - ndreg = (orte_regex_node_t*)opal_list_get_last(&nodenms); - - if ((0 < strlen(prefix) && NULL == ndreg->prefix) || - (0 == strlen(prefix) && NULL != ndreg->prefix) || - (0 < strlen(prefix) && NULL != ndreg->prefix && - 0 != strcmp(prefix, ndreg->prefix)) || - (NULL == suffix && NULL != ndreg->suffix) || - (NULL != suffix && NULL == ndreg->suffix) || - (NULL != suffix && NULL != ndreg->suffix && - 0 != strcmp(suffix, ndreg->suffix)) || - (numdigits != ndreg->num_digits)) { - found = false; - } else { - /* found a match - flag it */ - found = true; - } - } - if (found) { - /* get the last range on this nodeid - we do this - * to preserve order - */ - range = (orte_regex_range_t*)opal_list_get_last(&ndreg->ranges); - if (NULL == range) { - /* first range for this nodeid */ - range = OBJ_NEW(orte_regex_range_t); - range->vpid = nodenum; - range->cnt = 1; - opal_list_append(&ndreg->ranges, &range->super); - /* see if the node number is out of sequence */ - } else if (nodenum != (range->vpid + range->cnt)) { - /* start a new range */ - range = OBJ_NEW(orte_regex_range_t); - range->vpid = nodenum; - range->cnt = 1; - opal_list_append(&ndreg->ranges, &range->super); - } else { - /* everything matches - just increment the cnt */ - range->cnt++; - } - } else { - /* need to add it */ - ndreg = OBJ_NEW(orte_regex_node_t); - if (0 < strlen(prefix)) { - ndreg->prefix = strdup(prefix); - } - if (NULL != suffix) { - ndreg->suffix = strdup(suffix); - } - ndreg->num_digits = numdigits; - opal_list_append(&nodenms, &ndreg->super); - /* record the first range for this nodeid - we took - * care of names we can't compress above - */ - range = OBJ_NEW(orte_regex_range_t); - range->vpid = nodenum; - range->cnt = 1; - opal_list_append(&ndreg->ranges, &range->super); - } - if (NULL != suffix) { - free(suffix); - } - } - /* begin constructing the regular expression */ - while (NULL != (item = opal_list_remove_first(&nodenms))) { - ndreg = (orte_regex_node_t*)item; - - /* if no ranges, then just add the name */ - if (0 == opal_list_get_size(&ndreg->ranges)) { - if (NULL != ndreg->prefix) { - /* solitary node */ - opal_asprintf(&tmp, "%s", ndreg->prefix); - opal_argv_append_nosize(®exargs, tmp); - free(tmp); - } - OBJ_RELEASE(ndreg); - continue; - } - /* start the regex for this nodeid with the prefix */ - if (NULL != ndreg->prefix) { - opal_asprintf(&tmp, "%s[%d:", ndreg->prefix, ndreg->num_digits); - } else { - opal_asprintf(&tmp, "[%d:", ndreg->num_digits); - } - /* add the ranges */ - while (NULL != (itm2 = opal_list_remove_first(&ndreg->ranges))) { - range = (orte_regex_range_t*)itm2; - if (1 == range->cnt) { - opal_asprintf(&tmp2, "%s%u,", tmp, range->vpid); - } else { - opal_asprintf(&tmp2, "%s%u-%u,", tmp, range->vpid, range->vpid + range->cnt - 1); - } - free(tmp); - tmp = tmp2; - OBJ_RELEASE(range); - } - /* replace the final comma */ - tmp[strlen(tmp)-1] = ']'; - if (NULL != ndreg->suffix) { - /* add in the suffix, if provided */ - opal_asprintf(&tmp2, "%s%s", tmp, ndreg->suffix); - free(tmp); - tmp = tmp2; - } - opal_argv_append_nosize(®exargs, tmp); - free(tmp); - OBJ_RELEASE(ndreg); - } - - /* assemble final result */ - nodenames = opal_argv_join(regexargs, ','); - /* cleanup */ - opal_argv_free(regexargs); - OBJ_DESTRUCT(&nodenms); - - /* do the same for the vpids */ - tmp = NULL; - while (NULL != (item = opal_list_remove_first(&dvpids))) { - rng = (orte_regex_range_t*)item; - if (1 < rng->cnt) { - if (NULL == tmp) { - opal_asprintf(&tmp, "%u(%u)", rng->vpid, rng->cnt); - } else { - opal_asprintf(&tmp2, "%s,%u(%u)", tmp, rng->vpid, rng->cnt); - free(tmp); - tmp = tmp2; - } - } else { - if (NULL == tmp) { - opal_asprintf(&tmp, "%u", rng->vpid); - } else { - opal_asprintf(&tmp2, "%s,%u", tmp, rng->vpid); - free(tmp); - tmp = tmp2; - } - } - OBJ_RELEASE(rng); - } - OPAL_LIST_DESTRUCT(&dvpids); - - /* now concatenate the results into one string */ - opal_asprintf(&tmp2, "%s@%s", nodenames, tmp); - free(nodenames); - free(tmp); - *regex = tmp2; - return ORTE_SUCCESS; -} diff --git a/orte/mca/regx/reverse/regx_reverse.h b/orte/mca/regx/reverse/regx_reverse.h deleted file mode 100644 index e16954d162..0000000000 --- a/orte/mca/regx/reverse/regx_reverse.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef _MCA_REGX_REVERSE_H_ -#define _MCA_REGX_REVERSE_H_ - -#include "orte_config.h" - -#include "orte/types.h" - -#include "opal/mca/base/base.h" -#include "orte/mca/regx/regx.h" - - -BEGIN_C_DECLS - -ORTE_MODULE_DECLSPEC extern orte_regx_base_component_t mca_regx_reverse_component; -extern orte_regx_base_module_t orte_regx_reverse_module; - -END_C_DECLS - -#endif /* MCA_REGX_ORTE_H_ */ diff --git a/orte/mca/regx/reverse/regx_reverse_component.c b/orte/mca/regx/reverse/regx_reverse_component.c deleted file mode 100644 index a4921b91de..0000000000 --- a/orte/mca/regx/reverse/regx_reverse_component.c +++ /dev/null @@ -1,44 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/types.h" -#include "opal/types.h" - -#include "opal/util/show_help.h" - -#include "orte/mca/regx/regx.h" -#include "regx_reverse.h" - -static int component_query(mca_base_module_t **module, int *priority); - -/* - * Struct of function pointers and all that to let us be initialized - */ -orte_regx_base_component_t mca_regx_reverse_component = { - .base_version = { - MCA_REGX_BASE_VERSION_1_0_0, - .mca_component_name = "reverse", - MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION), - .mca_query_component = component_query, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - -static int component_query(mca_base_module_t **module, int *priority) -{ - *module = (mca_base_module_t*)&orte_regx_reverse_module; - *priority = 1; - return ORTE_SUCCESS; -} diff --git a/orte/mca/rml/base/rml_base_msg_handlers.c b/orte/mca/rml/base/rml_base_msg_handlers.c index 72a37cdae9..facc1e41c5 100644 --- a/orte/mca/rml/base/rml_base_msg_handlers.c +++ b/orte/mca/rml/base/rml_base_msg_handlers.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -44,6 +44,7 @@ #include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_wait.h" #include "orte/util/name_fns.h" +#include "orte/util/nidmap.h" #include "orte/util/threads.h" #include "orte/mca/rml/rml.h" @@ -181,9 +182,8 @@ void orte_rml_base_process_msg(int fd, short flags, void *cbdata) ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return; } - assert (NULL != orte_node_regex); - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &orte_node_regex, 1, OPAL_STRING))) { + if (ORTE_SUCCESS != (rc = orte_util_nidmap_create(orte_node_pool, buffer))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buffer); return; diff --git a/orte/orted/orted_comm.c b/orte/orted/orted_comm.c index 3f52cfee22..6cd1d5b15a 100644 --- a/orte/orted/orted_comm.c +++ b/orte/orted/orted_comm.c @@ -14,7 +14,7 @@ * reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2010-2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -54,11 +54,11 @@ #include "opal/runtime/opal.h" #include "opal/runtime/opal_progress.h" #include "opal/dss/dss.h" +#include "opal/mca/compress/compress.h" #include "orte/util/proc_info.h" #include "orte/util/session_dir.h" #include "orte/util/name_fns.h" -#include "orte/util/compress.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/grpcomm/base/base.h" @@ -639,7 +639,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, free(coprocessors); } answer = OBJ_NEW(opal_buffer_t); - if (orte_util_compress_block((uint8_t*)data.base_ptr, data.bytes_used, + if (opal_compress.compress_block((uint8_t*)data.base_ptr, data.bytes_used, &cmpdata, &cmplen)) { /* the data was compressed - mark that we compressed it */ flag = 1; diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index b8f995ef52..197828aa5d 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -16,7 +16,7 @@ * Copyright (c) 2009 Institut National de Recherche en Informatique * et Automatique. All rights reserved. * Copyright (c) 2010 Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -67,22 +67,22 @@ #include "opal/dss/dss.h" #include "opal/mca/hwloc/hwloc-internal.h" #include "opal/mca/pmix/pmix.h" +#include "opal/mca/compress/compress.h" #include "orte/util/show_help.h" #include "orte/util/proc_info.h" #include "orte/util/session_dir.h" #include "orte/util/name_fns.h" +#include "orte/util/nidmap.h" #include "orte/util/parse_options.h" #include "orte/mca/rml/base/rml_contact.h" #include "orte/util/pre_condition_transports.h" -#include "orte/util/compress.h" #include "orte/util/threads.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/ess/ess.h" #include "orte/mca/grpcomm/grpcomm.h" #include "orte/mca/grpcomm/base/base.h" -#include "orte/mca/regx/regx.h" #include "orte/mca/rml/rml.h" #include "orte/mca/rml/rml_types.h" #include "orte/mca/odls/odls.h" @@ -221,10 +221,6 @@ opal_cmd_line_init_t orte_cmd_line_opts[] = { NULL, OPAL_CMD_LINE_TYPE_BOOL, "Whether to report process bindings to stderr" }, - { "orte_node_regex", '\0', "nodes", "nodes", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "Regular expression defining nodes in system" }, - /* End of list */ { NULL, '\0', NULL, NULL, 0, NULL, OPAL_CMD_LINE_TYPE_NULL, NULL } @@ -747,7 +743,7 @@ int orte_daemon(int argc, char *argv[]) /* define the target jobid */ target.jobid = ORTE_PROC_MY_NAME->jobid; - if (orte_fwd_mpirun_port || orte_static_ports || NULL != orte_parent_uri) { + if (NULL != orte_parent_uri) { /* we start by sending to ourselves */ target.vpid = ORTE_PROC_MY_NAME->vpid; /* since we will be waiting for any children to send us @@ -755,11 +751,9 @@ int orte_daemon(int argc, char *argv[]) * a little time in the launch phase by "warming up" the * connection to our parent while we wait for our children */ buffer = OBJ_NEW(opal_buffer_t); // zero-byte message - if (NULL == orte_node_regex) { - orte_rml.recv_buffer_nb(ORTE_PROC_MY_PARENT, ORTE_RML_TAG_NODE_REGEX_REPORT, - ORTE_RML_PERSISTENT, node_regex_report, &node_regex_waiting); - node_regex_waiting = true; - } + node_regex_waiting = true; + orte_rml.recv_buffer_nb(ORTE_PROC_MY_PARENT, ORTE_RML_TAG_NODE_REGEX_REPORT, + ORTE_RML_PERSISTENT, node_regex_report, &node_regex_waiting); if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, ORTE_PROC_MY_PARENT, buffer, ORTE_RML_TAG_WARMUP_CONNECTION, @@ -917,7 +911,7 @@ int orte_daemon(int argc, char *argv[]) if (ORTE_SUCCESS != (ret = opal_dss.pack(&data, &opal_hwloc_topology, 1, OPAL_HWLOC_TOPO))) { ORTE_ERROR_LOG(ret); } - if (orte_util_compress_block((uint8_t*)data.base_ptr, data.bytes_used, + if (opal_compress.compress_block((uint8_t*)data.base_ptr, data.bytes_used, &cmpdata, &cmplen)) { /* the data was compressed - mark that we compressed it */ flag = 1; @@ -1020,10 +1014,6 @@ int orte_daemon(int argc, char *argv[]) i += 2; } } - if (NULL != orte_node_regex) { - /* now launch any child daemons of ours */ - orte_plm.remote_spawn(); - } } if (orte_debug_daemons_flag) { @@ -1174,20 +1164,11 @@ static void report_orted() { static void node_regex_report(int status, orte_process_name_t* sender, opal_buffer_t *buffer, orte_rml_tag_t tag, void *cbdata) { - int rc, n=1; - char * regex; - assert(NULL == orte_node_regex); + int rc; bool * active = (bool *)cbdata; - /* extract the node regex if needed, and update the routing tree */ - n = 1; - if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, ®ex, &n, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - return; - } - orte_node_regex = regex; - - if (ORTE_SUCCESS != (rc = orte_regx.nidmap_parse(orte_node_regex))) { + /* extract the node info if needed, and update the routing tree */ + if (ORTE_SUCCESS != (rc = orte_util_decode_nidmap(buffer))) { ORTE_ERROR_LOG(rc); return; } diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index 1f7fc2ec7f..2413ee7e90 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -13,7 +13,7 @@ * Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. @@ -81,9 +81,7 @@ char *orte_data_server_uri = NULL; /* ORTE OOB port flags */ bool orte_static_ports = false; -char *orte_oob_static_ports = NULL; bool orte_standalone_operation = false; -bool orte_fwd_mpirun_port = true; bool orte_keep_fqdn_hostnames = false; bool orte_have_fqdn_allocation = false; @@ -159,7 +157,6 @@ char *orte_default_hostfile = NULL; bool orte_default_hostfile_given = false; char *orte_rankfile = NULL; int orte_num_allocated_nodes = 0; -char *orte_node_regex = NULL; char *orte_default_dash_host = NULL; /* tool communication controls */ diff --git a/orte/runtime/orte_globals.h b/orte/runtime/orte_globals.h index 1ae4de3eee..68c03842b1 100644 --- a/orte/runtime/orte_globals.h +++ b/orte/runtime/orte_globals.h @@ -13,7 +13,7 @@ * Copyright (c) 2007-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2017-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -465,9 +465,7 @@ ORTE_DECLSPEC extern char *orte_data_server_uri; /* ORTE OOB port flags */ ORTE_DECLSPEC extern bool orte_static_ports; -ORTE_DECLSPEC extern char *orte_oob_static_ports; ORTE_DECLSPEC extern bool orte_standalone_operation; -ORTE_DECLSPEC extern bool orte_fwd_mpirun_port; /* nodename flags */ ORTE_DECLSPEC extern bool orte_keep_fqdn_hostnames; @@ -543,7 +541,6 @@ ORTE_DECLSPEC extern char *orte_default_hostfile; ORTE_DECLSPEC extern bool orte_default_hostfile_given; ORTE_DECLSPEC extern char *orte_rankfile; ORTE_DECLSPEC extern int orte_num_allocated_nodes; -ORTE_DECLSPEC extern char *orte_node_regex; ORTE_DECLSPEC extern char *orte_default_dash_host; /* PMI version control */ diff --git a/orte/runtime/orte_mca_params.c b/orte/runtime/orte_mca_params.c index ba29ef2fa7..0e4c10acd1 100644 --- a/orte/runtime/orte_mca_params.c +++ b/orte/runtime/orte_mca_params.c @@ -13,7 +13,7 @@ * Copyright (c) 2009-2010 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. @@ -407,14 +407,6 @@ int orte_register_params(void) orte_default_dash_host = NULL; } - /* regex of nodes in system */ - orte_node_regex = NULL; - (void) mca_base_var_register ("orte", "orte", NULL, "node_regex", - "Regular expression defining nodes in the system", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, - &orte_node_regex); - /* whether or not to keep FQDN hostnames */ orte_keep_fqdn_hostnames = false; (void) mca_base_var_register ("orte", "orte", NULL, "keep_fqdn_hostnames", @@ -776,13 +768,6 @@ int orte_register_params(void) OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, &orte_stack_trace_wait_timeout); - orte_fwd_mpirun_port = false; - (void) mca_base_var_register ("orte", "orte", NULL, "fwd_mpirun_port", - "Forward the port used by mpirun so all daemons will use it", - MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, - OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY, - &orte_fwd_mpirun_port); - /* register the URI of the UNIVERSAL data server */ orte_data_server_uri = NULL; (void) mca_base_var_register ("orte", "pmix", NULL, "server_uri", diff --git a/orte/util/Makefile.am b/orte/util/Makefile.am index d54503b3bb..5e050c3bf2 100644 --- a/orte/util/Makefile.am +++ b/orte/util/Makefile.am @@ -11,7 +11,7 @@ # All rights reserved. # Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. # Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2014-2018 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2019 Intel, Inc. All rights reserved. # Copyright (c) 2016 Research Organization for Information Science # and Technology (RIST). All rights reserved. # $COPYRIGHT$ @@ -58,8 +58,8 @@ headers += \ util/comm/comm.h \ util/attr.h \ util/listener.h \ - util/compress.h \ - util/threads.h + util/threads.h \ + util/nidmap.h lib@ORTE_LIB_PREFIX@open_rte_la_SOURCES += \ util/error_strings.c \ @@ -77,7 +77,7 @@ lib@ORTE_LIB_PREFIX@open_rte_la_SOURCES += \ util/comm/comm.c \ util/attr.c \ util/listener.c \ - util/compress.c + util/nidmap.c # Remove the generated man pages distclean-local: diff --git a/orte/util/compress.c b/orte/util/compress.c deleted file mode 100644 index d899f2d7f1..0000000000 --- a/orte/util/compress.c +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include - - -#include -#ifdef HAVE_STRING_H -#include -#endif -#ifdef HAVE_ZLIB_H -#include -#endif - -#include "opal/util/output.h" -#include "compress.h" - -#if OPAL_HAVE_ZLIB -bool orte_util_compress_block(uint8_t *inbytes, - size_t inlen, - uint8_t **outbytes, - size_t *olen) -{ - z_stream strm; - size_t len; - uint8_t *tmp; - - if (inlen < ORTE_COMPRESS_LIMIT) { - return false; - } - - /* set default output */ - *outbytes = NULL; - *olen = 0; - - /* setup the stream */ - memset (&strm, 0, sizeof (strm)); - deflateInit (&strm, 9); - - /* get an upper bound on the required output storage */ - len = deflateBound(&strm, inlen); - if (NULL == (tmp = (uint8_t*)malloc(len))) { - return false; - } - strm.next_in = inbytes; - strm.avail_in = inlen; - - /* allocating the upper bound guarantees zlib will - * always successfully compress into the available space */ - strm.avail_out = len; - strm.next_out = tmp; - - deflate (&strm, Z_FINISH); - deflateEnd (&strm); - - *outbytes = tmp; - *olen = len - strm.avail_out; - return true; // we did the compression -} -#else -bool orte_util_compress_block(uint8_t *inbytes, - size_t inlen, - uint8_t **outbytes, - size_t *olen) -{ - return false; // we did not compress -} -#endif - -#if OPAL_HAVE_ZLIB -bool orte_util_uncompress_block(uint8_t **outbytes, size_t olen, - uint8_t *inbytes, size_t len) -{ - uint8_t *dest; - z_stream strm; - - /* set the default error answer */ - *outbytes = NULL; - - /* setting destination to the fully decompressed size */ - dest = (uint8_t*)malloc(olen); - if (NULL == dest) { - return false; - } - - memset (&strm, 0, sizeof (strm)); - if (Z_OK != inflateInit(&strm)) { - free(dest); - return false; - } - strm.avail_in = len; - strm.next_in = inbytes; - strm.avail_out = olen; - strm.next_out = dest; - - if (Z_STREAM_END != inflate (&strm, Z_FINISH)) { - opal_output(0, "\tDECOMPRESS FAILED: %s", strm.msg); - } - inflateEnd (&strm); - *outbytes = dest; - return true; -} -#else -bool orte_util_uncompress_block(uint8_t **outbytes, size_t olen, - uint8_t *inbytes, size_t len) -{ - return false; -} -#endif diff --git a/orte/util/compress.h b/orte/util/compress.h deleted file mode 100644 index 5ba3faf46e..0000000000 --- a/orte/util/compress.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - * Compress/decompress long data blocks - */ - -#ifndef ORTE_COMPRESS_H -#define ORTE_COMPRESS_H - -#include - - -BEGIN_C_DECLS - -/* define a limit for compression */ -#define ORTE_COMPRESS_LIMIT 4096 - -/** - * Compress a string into a byte object using Zlib - */ -ORTE_DECLSPEC bool orte_util_compress_block(uint8_t *inbytes, - size_t inlen, - uint8_t **outbytes, - size_t *olen); - -/** - * Decompress a byte object - */ -ORTE_DECLSPEC bool orte_util_uncompress_block(uint8_t **outbytes, size_t olen, - uint8_t *inbytes, size_t len); - -END_C_DECLS - -#endif /* ORTE_COMPRESS_H */ diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c new file mode 100644 index 0000000000..04eaad570d --- /dev/null +++ b/orte/util/nidmap.c @@ -0,0 +1,793 @@ +/* + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/types.h" +#include "opal/types.h" + +#ifdef HAVE_UNISTD_H +#include +#endif +#include + +#include "opal/dss/dss_types.h" +#include "opal/mca/compress/compress.h" +#include "opal/util/argv.h" + +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rmaps/rmaps_types.h" +#include "orte/mca/routed/routed.h" +#include "orte/runtime/orte_globals.h" + +#include "orte/util/nidmap.h" + +int orte_util_nidmap_create(opal_pointer_array_t *pool, + opal_buffer_t *buffer) +{ + char *raw = NULL; + uint8_t *vpids=NULL, *flags=NULL, u8; + uint16_t u16; + uint16_t *slots=NULL; + uint32_t u32; + int n, ndaemons, rc, nbytes, nbitmap; + bool compressed; + char **names = NULL, **ranks = NULL; + orte_node_t *nptr; + opal_byte_object_t bo, *boptr; + size_t sz; + + /* pack a flag indicating if the HNP was included in the allocation */ + if (orte_hnp_is_allocated) { + u8 = 1; + } else { + u8 = 0; + } + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &u8, 1, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* pack a flag indicating if we are in a managed allocation */ + if (orte_managed_allocation) { + u8 = 1; + } else { + u8 = 0; + } + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &u8, 1, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* daemon vpids start from 0 and increase linearly by one + * up to the number of nodes in the system. The vpid is + * a 32-bit value. We don't know how many of the nodes + * in the system have daemons - we may not be using them + * all just yet. However, even the largest systems won't + * have more than a million nodes for quite some time, + * so for now we'll just allocate enough space to hold + * them all. Someone can optimize this further later */ + if (256 >= pool->size) { + nbytes = 1; + } else if (65536 >= pool->size) { + nbytes = 2; + } else { + nbytes = 4; + } + vpids = (uint8_t*)malloc(nbytes * pool->size); + + /* make room for the number of slots on each node */ + slots = (uint16_t*)malloc(sizeof(uint16_t) * pool->size); + + /* and for the flags for each node - only need one bit/node */ + nbitmap = (pool->size / 8) + 1; + flags = (uint8_t*)calloc(1, nbitmap); + + ndaemons = 0; + for (n=0; n < pool->size; n++) { + if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(pool, n))) { + continue; + } + /* add the hostname to the argv */ + opal_argv_append_nosize(&names, nptr->name); + /* store the vpid */ + if (1 == nbytes) { + if (NULL == nptr->daemon) { + vpids[ndaemons] = UINT8_MAX; + } else { + vpids[ndaemons] = nptr->daemon->name.vpid; + } + } else if (2 == nbytes) { + if (NULL == nptr->daemon) { + u16 = UINT16_MAX; + } else { + u16 = nptr->daemon->name.vpid; + } + memcpy(&vpids[nbytes*ndaemons], &u16, 2); + } else { + if (NULL == nptr->daemon) { + u32 = UINT32_MAX; + } else { + u32 = nptr->daemon->name.vpid; + } + memcpy(&vpids[nbytes*ndaemons], &u32, 4); + } + /* store the number of slots */ + slots[n] = nptr->slots; + /* store the flag */ + if (ORTE_FLAG_TEST(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN)) { + flags[n/8] |= (1 << (7 - (n % 8))); + } + ++ndaemons; + } + + /* construct the string of node names for compression */ + raw = opal_argv_join(names, ','); + if (opal_compress.compress_block((uint8_t*)raw, strlen(raw)+1, + (uint8_t**)&bo.bytes, &sz)) { + /* mark that this was compressed */ + compressed = true; + bo.size = sz; + } else { + /* mark that this was not compressed */ + compressed = false; + bo.bytes = (uint8_t*)raw; + bo.size = strlen(raw)+1; + } + /* indicate compression */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &compressed, 1, OPAL_BOOL))) { + if (compressed) { + free(bo.bytes); + } + goto cleanup; + } + /* if compressed, provide the uncompressed size */ + if (compressed) { + sz = strlen(raw)+1; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &sz, 1, OPAL_SIZE))) { + free(bo.bytes); + goto cleanup; + } + } + /* add the object */ + boptr = &bo; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &boptr, 1, OPAL_BYTE_OBJECT))) { + if (compressed) { + free(bo.bytes); + } + goto cleanup; + } + if (compressed) { + free(bo.bytes); + } + + /* compress the vpids */ + if (opal_compress.compress_block(vpids, nbytes*ndaemons, + (uint8_t**)&bo.bytes, &sz)) { + /* mark that this was compressed */ + compressed = true; + bo.size = sz; + } else { + /* mark that this was not compressed */ + compressed = false; + bo.bytes = vpids; + bo.size = nbytes*ndaemons; + } + /* indicate compression */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &compressed, 1, OPAL_BOOL))) { + if (compressed) { + free(bo.bytes); + } + goto cleanup; + } + /* provide the #bytes/vpid */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &nbytes, 1, OPAL_INT))) { + if (compressed) { + free(bo.bytes); + } + goto cleanup; + } + /* if compressed, provide the uncompressed size */ + if (compressed) { + sz = nbytes*ndaemons; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &sz, 1, OPAL_SIZE))) { + free(bo.bytes); + goto cleanup; + } + } + /* add the object */ + boptr = &bo; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &boptr, 1, OPAL_BYTE_OBJECT))) { + if (compressed) { + free(bo.bytes); + } + goto cleanup; + } + if (compressed) { + free(bo.bytes); + } + + /* compress the slots */ + if (opal_compress.compress_block((uint8_t*)slots, sizeof(uint16_t)*ndaemons, + (uint8_t**)&bo.bytes, &sz)) { + /* mark that this was compressed */ + compressed = true; + bo.size = sz; + } else { + /* mark that this was not compressed */ + compressed = false; + bo.bytes = (uint8_t*)slots; + bo.size = sizeof(uint16_t)*ndaemons; + } + /* indicate compression */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &compressed, 1, OPAL_BOOL))) { + if (compressed) { + free(bo.bytes); + } + goto cleanup; + } + /* if compressed, provide the uncompressed size */ + if (compressed) { + sz = sizeof(uint16_t)*ndaemons; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &sz, 1, OPAL_SIZE))) { + free(bo.bytes); + goto cleanup; + } + } + /* add the object */ + boptr = &bo; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &boptr, 1, OPAL_BYTE_OBJECT))) { + if (compressed) { + free(bo.bytes); + } + goto cleanup; + } + if (compressed) { + free(bo.bytes); + } + + /* compress the flags */ + if (opal_compress.compress_block(flags, nbitmap, + (uint8_t**)&bo.bytes, &sz)) { + /* mark that this was compressed */ + compressed = true; + bo.size = sz; + } else { + /* mark that this was not compressed */ + compressed = false; + bo.bytes = flags; + bo.size = nbitmap; + } + /* indicate compression */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &compressed, 1, OPAL_BOOL))) { + if (compressed) { + free(bo.bytes); + } + goto cleanup; + } + /* if compressed, provide the uncompressed size */ + if (compressed) { + sz = nbitmap; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &sz, 1, OPAL_SIZE))) { + free(bo.bytes); + goto cleanup; + } + } + /* add the object */ + boptr = &bo; + rc = opal_dss.pack(buffer, &boptr, 1, OPAL_BYTE_OBJECT); + + cleanup: + if (NULL != names) { + opal_argv_free(names); + } + if (NULL != raw) { + free(raw); + } + if (NULL != ranks) { + opal_argv_free(ranks); + } + if (NULL != vpids) { + free(vpids); + } + if (NULL != slots) { + free(slots); + } + if (NULL != flags) { + free(flags); + } + + return rc; +} + +int orte_util_decode_nidmap(opal_buffer_t *buf) +{ + uint8_t u8, *vp8 = NULL, *flags = NULL; + uint16_t *vp16 = NULL, *slots = NULL; + uint32_t *vp32 = NULL, vpid; + int cnt, rc, nbytes, n; + bool compressed; + size_t sz; + opal_byte_object_t *boptr; + char *raw = NULL, **names = NULL; + orte_node_t *nd; + orte_job_t *daemons; + orte_proc_t *proc; + orte_topology_t *t; + + /* unpack the flag indicating if HNP is in allocation */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &u8, &cnt, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + if (1 == u8) { + orte_hnp_is_allocated = true; + } else { + orte_hnp_is_allocated = false; + } + + /* unpack the flag indicating if we are in managed allocation */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &u8, &cnt, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + if (1 == u8) { + orte_managed_allocation = true; + } else { + orte_managed_allocation = false; + } + + /* unpack compression flag for node names */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &compressed, &cnt, OPAL_BOOL))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + /* if compressed, get the uncompressed size */ + if (compressed) { + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &sz, &cnt, OPAL_SIZE))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + } + + /* unpack the nodename object */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &boptr, &cnt, OPAL_BYTE_OBJECT))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + /* if compressed, decompress */ + if (compressed) { + if (!opal_compress.decompress_block((uint8_t**)&raw, sz, + boptr->bytes, boptr->size)) { + ORTE_ERROR_LOG(ORTE_ERROR); + if (NULL != boptr->bytes) { + free(boptr->bytes); + } + free(boptr); + rc = ORTE_ERROR; + goto cleanup; + } + } else { + raw = (char*)boptr->bytes; + boptr->bytes = NULL; + boptr->size = 0; + } + if (NULL != boptr->bytes) { + free(boptr->bytes); + } + free(boptr); + names = opal_argv_split(raw, ','); + free(raw); + + + /* unpack compression flag for daemon vpids */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &compressed, &cnt, OPAL_BOOL))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + /* unpack the #bytes/vpid */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &nbytes, &cnt, OPAL_INT))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + /* if compressed, get the uncompressed size */ + if (compressed) { + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &sz, &cnt, OPAL_SIZE))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + } + + /* unpack the vpid object */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &boptr, &cnt, OPAL_BYTE_OBJECT))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + /* if compressed, decompress */ + if (compressed) { + if (!opal_compress.decompress_block((uint8_t**)&vp8, sz, + boptr->bytes, boptr->size)) { + ORTE_ERROR_LOG(ORTE_ERROR); + if (NULL != boptr->bytes) { + free(boptr->bytes); + } + free(boptr); + rc = ORTE_ERROR; + goto cleanup; + } + } else { + vp8 = (uint8_t*)boptr->bytes; + boptr->bytes = NULL; + boptr->size = 0; + } + if (NULL != boptr->bytes) { + free(boptr->bytes); + } + free(boptr); + if (2 == nbytes) { + vp16 = (uint16_t*)vp8; + vp8 = NULL; + } else if (4 == nbytes) { + vp32 = (uint32_t*)vp8; + vp8 = NULL; + } + + + /* unpack compression flag for slots */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &compressed, &cnt, OPAL_BOOL))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + /* if compressed, get the uncompressed size */ + if (compressed) { + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &sz, &cnt, OPAL_SIZE))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + } + + /* unpack the slots object */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &boptr, &cnt, OPAL_BYTE_OBJECT))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + /* if compressed, decompress */ + if (compressed) { + if (!opal_compress.decompress_block((uint8_t**)&slots, sz, + boptr->bytes, boptr->size)) { + ORTE_ERROR_LOG(ORTE_ERROR); + if (NULL != boptr->bytes) { + free(boptr->bytes); + } + free(boptr); + rc = ORTE_ERROR; + goto cleanup; + } + } else { + slots = (uint16_t*)boptr->bytes; + boptr->bytes = NULL; + boptr->size = 0; + } + if (NULL != boptr->bytes) { + free(boptr->bytes); + } + free(boptr); + + + /* unpack compression flag for node flags */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &compressed, &cnt, OPAL_BOOL))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + /* if compressed, get the uncompressed size */ + if (compressed) { + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &sz, &cnt, OPAL_SIZE))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + } + + /* unpack the node flags object */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &boptr, &cnt, OPAL_BYTE_OBJECT))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + /* if compressed, decompress */ + if (compressed) { + if (!opal_compress.decompress_block((uint8_t**)&flags, sz, + boptr->bytes, boptr->size)) { + ORTE_ERROR_LOG(ORTE_ERROR); + if (NULL != boptr->bytes) { + free(boptr->bytes); + } + free(boptr); + rc = ORTE_ERROR; + goto cleanup; + } + } else { + flags = (uint8_t*)boptr->bytes; + boptr->bytes = NULL; + boptr->size = 0; + } + if (NULL != boptr->bytes) { + free(boptr->bytes); + } + free(boptr); + + /* if we are the HNP, we don't need any of this stuff */ + if (ORTE_PROC_IS_HNP) { + goto cleanup; + } + + /* get the daemon job object */ + daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid); + + /* get our topology */ + for (n=0; n < orte_node_topologies->size; n++) { + if (NULL != (t = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, n))) { + break; + } + } + + /* create the node pool array - this will include + * _all_ nodes known to the allocation */ + for (n=0; NULL != names[n]; n++) { + /* add this name to the pool */ + nd = OBJ_NEW(orte_node_t); + nd->name = names[n]; + opal_pointer_array_set_item(orte_node_pool, n, nd); + /* set the #slots */ + nd->slots = slots[n]; + /* set the flags */ + if (1 == flags[n]) { + ORTE_FLAG_SET(nd, ORTE_NODE_FLAG_SLOTS_GIVEN); + } + /* set the topology */ +#if !OPAL_ENABLE_HETEROGENEOUS_SUPPORT + nd->topology = t; +#endif + /* see if it has a daemon on it */ + if (1 == nbytes && UINT8_MAX != vp8[n]) { + vpid = vp8[n]; + } else if (2 == nbytes && UINT16_MAX != vp16[n]) { + vpid = vp16[n]; + } else if (4 == nbytes && UINT32_MAX != vp32[n]) { + vpid = vp32[n]; + } else { + vpid = UINT32_MAX; + } + if (UINT32_MAX != vpid && + NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, vpid))) { + proc = OBJ_NEW(orte_proc_t); + proc->name.jobid = ORTE_PROC_MY_NAME->jobid; + proc->name.vpid = vpid; + proc->state = ORTE_PROC_STATE_RUNNING; + ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_ALIVE); + daemons->num_procs++; + opal_pointer_array_set_item(daemons->procs, proc->name.vpid, proc); + } + nd->index = proc->name.vpid; + OBJ_RETAIN(nd); + proc->node = nd; + OBJ_RETAIN(proc); + nd->daemon = proc; + } + + /* update num procs */ + if (orte_process_info.num_procs != daemons->num_procs) { + orte_process_info.num_procs = daemons->num_procs; + /* need to update the routing plan */ + orte_routed.update_routing_plan(NULL); + } + + if (orte_process_info.max_procs < orte_process_info.num_procs) { + orte_process_info.max_procs = orte_process_info.num_procs; + } + + cleanup: + return rc; +} + +int orte_util_generate_ppn(orte_job_t *jdata, + opal_buffer_t *buf) +{ + uint16_t *ppn=NULL; + size_t nbytes; + int rc = ORTE_SUCCESS; + orte_app_idx_t i; + int j, k; + opal_byte_object_t bo, *boptr; + bool compressed; + orte_node_t *nptr; + orte_proc_t *proc; + size_t sz; + + /* make room for the number of procs on each node */ + nbytes = sizeof(uint16_t) * orte_node_pool->size; + ppn = (uint16_t*)malloc(nbytes); + + for (i=0; i < jdata->num_apps; i++) { + /* reset the #procs */ + memset(ppn, 0, nbytes); + /* for each app_context, compute the #procs on + * each node of the allocation */ + for (j=0; j < orte_node_pool->size; j++) { + if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, j))) { + continue; + } + if (NULL == nptr->daemon) { + continue; + } + for (k=0; k < nptr->procs->size; k++) { + if (NULL != (proc = (orte_proc_t*)opal_pointer_array_get_item(nptr->procs, k))) { + if (proc->name.jobid == jdata->jobid) { + ++ppn[j]; + } + } + } + } + if (opal_compress.compress_block((uint8_t*)ppn, nbytes, + (uint8_t**)&bo.bytes, &sz)) { + /* mark that this was compressed */ + compressed = true; + bo.size = sz; + } else { + /* mark that this was not compressed */ + compressed = false; + bo.bytes = (uint8_t*)ppn; + bo.size = nbytes; + } + /* indicate compression */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &compressed, 1, OPAL_BOOL))) { + if (compressed) { + free(bo.bytes); + } + goto cleanup; + } + /* if compressed, provide the uncompressed size */ + if (compressed) { + sz = nbytes; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &sz, 1, OPAL_SIZE))) { + free(bo.bytes); + goto cleanup; + } + } + /* add the object */ + boptr = &bo; + rc = opal_dss.pack(buf, &boptr, 1, OPAL_BYTE_OBJECT); + if (OPAL_SUCCESS != rc) { + break; + } + } + + cleanup: + free(ppn); + return rc; +} + +int orte_util_decode_ppn(orte_job_t *jdata, + opal_buffer_t *buf) +{ + orte_app_idx_t n; + int m, cnt, rc; + opal_byte_object_t *boptr; + bool compressed; + size_t sz; + uint16_t *ppn, k; + orte_node_t *node; + orte_proc_t *proc; + + for (n=0; n < jdata->num_apps; n++) { + /* unpack the compression flag */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &compressed, &cnt, OPAL_BOOL))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* if compressed, unpack the raw size */ + if (compressed) { + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &sz, &cnt, OPAL_SIZE))) { + ORTE_ERROR_LOG(rc); + return rc; + } + } + /* unpack the byte object describing this app */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &boptr, &cnt, OPAL_BYTE_OBJECT))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + if (ORTE_PROC_IS_HNP) { + /* just discard it */ + free(boptr->bytes); + free(boptr); + continue; + } + + /* decompress if required */ + if (compressed) { + if (!opal_compress.decompress_block((uint8_t**)&ppn, sz, + boptr->bytes, boptr->size)) { + ORTE_ERROR_LOG(ORTE_ERROR); + OBJ_RELEASE(boptr); + return ORTE_ERROR; + } + } else { + ppn = (uint16_t*)boptr->bytes; + boptr->bytes = NULL; + boptr->size = 0; + } + if (NULL != boptr->bytes) { + free(boptr->bytes); + } + free(boptr); + + /* cycle thru the node pool */ + for (m=0; m < orte_node_pool->size; m++) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, m))) { + continue; + } + if (0 < ppn[m]) { + if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) { + OBJ_RETAIN(node); + ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED); + opal_pointer_array_add(jdata->map->nodes, node); + } + /* create a proc object for each one */ + for (k=0; k < ppn[m]; k++) { + proc = OBJ_NEW(orte_proc_t); + proc->name.jobid = jdata->jobid; + /* leave the vpid undefined as this will be determined + * later when we do the overall ranking */ + proc->app_idx = n; + proc->parent = node->daemon->name.vpid; + OBJ_RETAIN(node); + proc->node = node; + /* flag the proc as ready for launch */ + proc->state = ORTE_PROC_STATE_INIT; + opal_pointer_array_add(node->procs, proc); + /* we will add the proc to the jdata array when we + * compute its rank */ + } + node->num_procs += ppn[m]; + } + } + free(ppn); + } + + return ORTE_SUCCESS; +} diff --git a/orte/util/nidmap.h b/orte/util/nidmap.h new file mode 100644 index 0000000000..ac935f58cc --- /dev/null +++ b/orte/util/nidmap.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2006-2013 Los Alamos National Security, LLC. + * All rights reserved. + * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef ORTE_NIDMAP_H +#define ORTE_NIDMAP_H + +#include "orte_config.h" + +#include "opal/class/opal_pointer_array.h" +#include "opal/dss/dss_types.h" +#include "orte/runtime/orte_globals.h" + +ORTE_DECLSPEC int orte_util_nidmap_create(opal_pointer_array_t *pool, + opal_buffer_t *buf); + +ORTE_DECLSPEC int orte_util_decode_nidmap(opal_buffer_t *buf); + +ORTE_DECLSPEC int orte_util_generate_ppn(orte_job_t *jdata, + opal_buffer_t *buf); + +ORTE_DECLSPEC int orte_util_decode_ppn(orte_job_t *jdata, + opal_buffer_t *buf); + +#endif /* ORTE_NIDMAP_H */ From 88ac05fca6fe9e908cf7abfbcf4131c0020b6427 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Tue, 5 Feb 2019 14:10:53 +0900 Subject: [PATCH 2/8] misc fixes Signed-off-by: Gilles Gouaillardet --- opal/runtime/opal_init.c | 2 -- orte/mca/ess/base/ess_base_std_orted.c | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/opal/runtime/opal_init.c b/opal/runtime/opal_init.c index 1c957ef23a..2db5b44116 100644 --- a/opal/runtime/opal_init.c +++ b/opal/runtime/opal_init.c @@ -589,7 +589,5 @@ opal_init(int* pargc, char*** pargv) return opal_init_error ("opal_compress_base_select", ret); } - opal_finalize_pop_domain (); - return OPAL_SUCCESS; } diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index 172ac7212e..365fc871e5 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -621,6 +621,7 @@ int orte_ess_base_orted_finalize(void) (void) mca_base_framework_close(&orte_plm_base_framework); /* make sure our local procs are dead */ orte_odls.kill_local_procs(NULL); + (void) mca_base_framework_close(&orte_rmaps_base_framework); (void) mca_base_framework_close(&orte_rtc_base_framework); (void) mca_base_framework_close(&orte_odls_base_framework); (void) mca_base_framework_close(&orte_routed_base_framework); From 01e9aca40fe091750654d02473d8c861950ce9d9 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 6 Feb 2019 10:35:24 -0800 Subject: [PATCH 3/8] Add topology support for hetero systems Signed-off-by: Ralph Castain --- orte/mca/odls/odls_types.h | 5 +- orte/mca/plm/base/plm_base_launch_support.c | 78 ++- orte/orted/orted_comm.c | 29 +- orte/util/nidmap.c | 599 ++++++++++++++++---- orte/util/nidmap.h | 9 + 5 files changed, 621 insertions(+), 99 deletions(-) diff --git a/orte/mca/odls/odls_types.h b/orte/mca/odls/odls_types.h index 539f9a6ef5..aabbb34b3b 100644 --- a/orte/mca/odls/odls_types.h +++ b/orte/mca/odls/odls_types.h @@ -12,7 +12,7 @@ * Copyright (c) 2011-2016 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -93,6 +93,9 @@ typedef uint8_t orte_daemon_cmd_flag_t; /* tell DVM daemons to cleanup resources from job */ #define ORTE_DAEMON_DVM_CLEANUP_JOB_CMD (orte_daemon_cmd_flag_t) 34 +/* pass node info */ +#define ORTE_DAEMON_PASS_NODE_INFO_CMD (orte_daemon_cmd_flag_t) 35 + /* * Struct written up the pipe from the child to the parent. */ diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index 363c006233..57f609bfc1 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -130,7 +130,11 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata) orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata; orte_topology_t *t; orte_node_t *node; - int i; + int i, rc; + uint8_t u8; + opal_buffer_t buf; + orte_grpcomm_signature_t *sig; + orte_daemon_cmd_flag_t command = ORTE_DAEMON_PASS_NODE_INFO_CMD; ORTE_ACQUIRE_OBJECT(caddy); @@ -177,6 +181,78 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata) /* ensure we update the routing plan */ orte_routed.update_routing_plan(NULL); + /* prep the buffer */ + OBJ_CONSTRUCT(&buf, opal_buffer_t); + /* load the command */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &command, 1, ORTE_DAEMON_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&buf); + ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); + OBJ_RELEASE(caddy); + return; + } + + + /* if we did not execute a tree-spawn, then the daemons do + * not currently have a nidmap for the job - in that case, + * send one to them */ + if (!orte_nidmap_communicated) { + u8 = 1; + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &u8, 1, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&buf); + ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); + OBJ_RELEASE(caddy); + return; + } + if (OPAL_SUCCESS != (rc = orte_util_nidmap_create(orte_node_pool, &buf))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&buf); + ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); + OBJ_RELEASE(caddy); + return; + } + orte_nidmap_communicated = true; + } else { + u8 = 0; + if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &u8, 1, OPAL_UINT8))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&buf); + ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); + OBJ_RELEASE(caddy); + return; + } + } + + /* we always send the topologies and the #slots on each node. Note + * that we cannot send the #slots until after the above step since, + * for unmanaged allocations, we might have just determined it! */ + if (OPAL_SUCCESS != (rc = orte_util_pass_node_info(&buf))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&buf); + ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); + OBJ_RELEASE(caddy); + return; + } + + /* goes to all daemons */ + sig = OBJ_NEW(orte_grpcomm_signature_t); + sig->signature = (orte_process_name_t*)malloc(sizeof(orte_process_name_t)); + sig->signature[0].jobid = ORTE_PROC_MY_NAME->jobid; + sig->signature[0].vpid = ORTE_VPID_WILDCARD; + sig->sz = 1; + if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(sig, ORTE_RML_TAG_DAEMON, &buf))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(sig); + OBJ_DESTRUCT(&buf); + ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE); + OBJ_RELEASE(caddy); + return; + } + OBJ_DESTRUCT(&buf); + /* maintain accounting */ + OBJ_RELEASE(sig); + /* progress the job */ caddy->jdata->state = ORTE_JOB_STATE_DAEMONS_REPORTED; ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_VM_READY); diff --git a/orte/orted/orted_comm.c b/orte/orted/orted_comm.c index 6cd1d5b15a..b07e86e6ab 100644 --- a/orte/orted/orted_comm.c +++ b/orte/orted/orted_comm.c @@ -59,6 +59,7 @@ #include "orte/util/proc_info.h" #include "orte/util/session_dir.h" #include "orte/util/name_fns.h" +#include "orte/util/nidmap.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/grpcomm/base/base.h" @@ -126,7 +127,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, char *coprocessors; orte_job_map_t *map; int8_t flag; - uint8_t *cmpdata; + uint8_t *cmpdata, u8; size_t cmplen; /* unpack the command */ @@ -241,6 +242,32 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, } break; + + case ORTE_DAEMON_PASS_NODE_INFO_CMD: + if (orte_debug_daemons_flag) { + opal_output(0, "%s orted_cmd: received pass_node_info", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); + } + if (!ORTE_PROC_IS_HNP) { + n = 1; + if (ORTE_SUCCESS != (ret = opal_dss.unpack(buffer, &u8, &n, OPAL_UINT8))) { + ORTE_ERROR_LOG(ret); + goto CLEANUP; + } + if (1 == u8) { + if (ORTE_SUCCESS != (ret = orte_util_decode_nidmap(buffer))) { + ORTE_ERROR_LOG(ret); + goto CLEANUP; + } + } + if (ORTE_SUCCESS != (ret = orte_util_parse_node_info(buffer))) { + ORTE_ERROR_LOG(ret); + goto CLEANUP; + } + } + break; + + /**** ADD_LOCAL_PROCS ****/ case ORTE_DAEMON_ADD_LOCAL_PROCS: case ORTE_DAEMON_DVM_ADD_PROCS: diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index 04eaad570d..73dfe518f6 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -24,7 +24,7 @@ #include "opal/util/argv.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rmaps/rmaps_types.h" +#include "orte/mca/rmaps/base/base.h" #include "orte/mca/routed/routed.h" #include "orte/runtime/orte_globals.h" @@ -34,11 +34,10 @@ int orte_util_nidmap_create(opal_pointer_array_t *pool, opal_buffer_t *buffer) { char *raw = NULL; - uint8_t *vpids=NULL, *flags=NULL, u8; + uint8_t *vpids=NULL, u8; uint16_t u16; - uint16_t *slots=NULL; uint32_t u32; - int n, ndaemons, rc, nbytes, nbitmap; + int n, ndaemons, rc, nbytes; bool compressed; char **names = NULL, **ranks = NULL; orte_node_t *nptr; @@ -84,13 +83,6 @@ int orte_util_nidmap_create(opal_pointer_array_t *pool, } vpids = (uint8_t*)malloc(nbytes * pool->size); - /* make room for the number of slots on each node */ - slots = (uint16_t*)malloc(sizeof(uint16_t) * pool->size); - - /* and for the flags for each node - only need one bit/node */ - nbitmap = (pool->size / 8) + 1; - flags = (uint8_t*)calloc(1, nbitmap); - ndaemons = 0; for (n=0; n < pool->size; n++) { if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(pool, n))) { @@ -120,12 +112,6 @@ int orte_util_nidmap_create(opal_pointer_array_t *pool, } memcpy(&vpids[nbytes*ndaemons], &u32, 4); } - /* store the number of slots */ - slots[n] = nptr->slots; - /* store the flag */ - if (ORTE_FLAG_TEST(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN)) { - flags[n/8] |= (1 << (7 - (n % 8))); - } ++ndaemons; } @@ -215,76 +201,6 @@ int orte_util_nidmap_create(opal_pointer_array_t *pool, free(bo.bytes); } - /* compress the slots */ - if (opal_compress.compress_block((uint8_t*)slots, sizeof(uint16_t)*ndaemons, - (uint8_t**)&bo.bytes, &sz)) { - /* mark that this was compressed */ - compressed = true; - bo.size = sz; - } else { - /* mark that this was not compressed */ - compressed = false; - bo.bytes = (uint8_t*)slots; - bo.size = sizeof(uint16_t)*ndaemons; - } - /* indicate compression */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &compressed, 1, OPAL_BOOL))) { - if (compressed) { - free(bo.bytes); - } - goto cleanup; - } - /* if compressed, provide the uncompressed size */ - if (compressed) { - sz = sizeof(uint16_t)*ndaemons; - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &sz, 1, OPAL_SIZE))) { - free(bo.bytes); - goto cleanup; - } - } - /* add the object */ - boptr = &bo; - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &boptr, 1, OPAL_BYTE_OBJECT))) { - if (compressed) { - free(bo.bytes); - } - goto cleanup; - } - if (compressed) { - free(bo.bytes); - } - - /* compress the flags */ - if (opal_compress.compress_block(flags, nbitmap, - (uint8_t**)&bo.bytes, &sz)) { - /* mark that this was compressed */ - compressed = true; - bo.size = sz; - } else { - /* mark that this was not compressed */ - compressed = false; - bo.bytes = flags; - bo.size = nbitmap; - } - /* indicate compression */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &compressed, 1, OPAL_BOOL))) { - if (compressed) { - free(bo.bytes); - } - goto cleanup; - } - /* if compressed, provide the uncompressed size */ - if (compressed) { - sz = nbitmap; - if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &sz, 1, OPAL_SIZE))) { - free(bo.bytes); - goto cleanup; - } - } - /* add the object */ - boptr = &bo; - rc = opal_dss.pack(buffer, &boptr, 1, OPAL_BYTE_OBJECT); - cleanup: if (NULL != names) { opal_argv_free(names); @@ -298,12 +214,6 @@ int orte_util_nidmap_create(opal_pointer_array_t *pool, if (NULL != vpids) { free(vpids); } - if (NULL != slots) { - free(slots); - } - if (NULL != flags) { - free(flags); - } return rc; } @@ -574,10 +484,9 @@ int orte_util_decode_nidmap(opal_buffer_t *buf) if (1 == flags[n]) { ORTE_FLAG_SET(nd, ORTE_NODE_FLAG_SLOTS_GIVEN); } - /* set the topology */ -#if !OPAL_ENABLE_HETEROGENEOUS_SUPPORT + /* set the topology - always default to homogeneous + * as that is the most common scenario */ nd->topology = t; -#endif /* see if it has a daemon on it */ if (1 == nbytes && UINT8_MAX != vp8[n]) { vpid = vp8[n]; @@ -620,6 +529,504 @@ int orte_util_decode_nidmap(opal_buffer_t *buf) return rc; } +typedef struct { + opal_list_item_t super; + orte_topology_t *t; +} orte_tptr_trk_t; +static OBJ_CLASS_INSTANCE(orte_tptr_trk_t, + opal_list_item_t, + NULL, NULL); + +int orte_util_pass_node_info(opal_buffer_t *buffer) +{ + uint16_t *slots=NULL, slot = UINT16_MAX; + uint8_t *flags=NULL, flag = UINT8_MAX, *topologies = NULL; + int8_t i8, ntopos; + int rc, n, nbitmap, nstart; + bool compressed, unislots = true, uniflags = true, unitopos = true; + orte_node_t *nptr; + opal_byte_object_t bo, *boptr; + size_t sz, nslots; + opal_buffer_t bucket; + orte_tptr_trk_t *trk; + opal_list_t topos; + orte_topology_t *t; + + /* make room for the number of slots on each node */ + nslots = sizeof(uint16_t) * orte_node_pool->size; + slots = (uint16_t*)malloc(nslots); + /* and for the flags for each node - only need one bit/node */ + nbitmap = (orte_node_pool->size / 8) + 1; + flags = (uint8_t*)calloc(1, nbitmap); + + /* handle the topologies - as the most common case by far + * is to have homogeneous topologies, we only send them + * if something is different. We know that the HNP is + * the first topology, and that any differing topology + * on the compute nodes must follow. So send the topologies + * if and only if: + * + * (a) the HNP is being used to house application procs and + * there is more than one topology in our array; or + * + * (b) the HNP is not being used, but there are more than + * two topologies in our array, thus indicating that + * there are multiple topologies on the compute nodes + */ + if (!orte_hnp_is_allocated || (ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping) & ORTE_MAPPING_NO_USE_LOCAL)) { + nstart = 1; + } else { + nstart = 0; + } + OBJ_CONSTRUCT(&topos, opal_list_t); + OBJ_CONSTRUCT(&bucket, opal_buffer_t); + for (n=nstart; n < orte_node_topologies->size; n++) { + if (NULL == (t = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, n))) { + continue; + } + trk = OBJ_NEW(orte_tptr_trk_t); + trk->t = t; + opal_list_append(&topos, &trk->super); + /* pack this topology string */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &t->sig, 1, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&bucket); + goto cleanup; + } + /* pack the topology itself */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(&bucket, &t->topo, 1, OPAL_HWLOC_TOPO))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&bucket); + goto cleanup; + } + } + /* pack the number of topologies in allocation */ + ntopos = opal_list_get_size(&topos); + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &ntopos, 1, OPAL_INT8))) { + goto cleanup; + } + if (1 < ntopos) { + /* need to send them along */ + opal_dss.copy_payload(buffer, &bucket); + /* allocate space to report them */ + ntopos = orte_node_pool->size; + topologies = (uint8_t*)malloc(ntopos); + unitopos = false; + } + OBJ_DESTRUCT(&bucket); + + for (n=0; n < orte_node_pool->size; n++) { + if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { + continue; + } + /* store the topology, if required */ + if (!unitopos) { + topologies[n] = 0; + if (0 == nstart || 0 < n) { + OPAL_LIST_FOREACH(trk, &topos, orte_tptr_trk_t) { + if (trk->t == nptr->topology) { + break; + } + topologies[n]++; + } + } + } + /* store the number of slots */ + slots[n] = nptr->slots; + if (UINT16_MAX == slot) { + slot = nptr->slots; + } else if (slot != nptr->slots) { + unislots = false; + } + /* store the flag */ + if (ORTE_FLAG_TEST(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN)) { + flags[n/8] |= (1 << (7 - (n % 8))); + if (UINT8_MAX == flag) { + flag = 1; + } else if (1 != flag) { + uniflags = false; + } + } else { + if (UINT8_MAX == flag) { + flag = 0; + } else if (0 != flag) { + uniflags = false; + } + } + } + + /* deal with the topology assignments */ + if (!unitopos) { + if (opal_compress.compress_block((uint8_t*)topologies, ntopos, + (uint8_t**)&bo.bytes, &sz)) { + /* mark that this was compressed */ + i8 = 1; + compressed = true; + bo.size = sz; + } else { + /* mark that this was not compressed */ + i8 = 0; + compressed = false; + bo.bytes = topologies; + bo.size = nbitmap; + } + /* indicate compression */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &i8, 1, OPAL_INT8))) { + if (compressed) { + free(bo.bytes); + } + goto cleanup; + } + /* if compressed, provide the uncompressed size */ + if (compressed) { + sz = nslots; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &sz, 1, OPAL_SIZE))) { + free(bo.bytes); + goto cleanup; + } + } + /* add the object */ + boptr = &bo; + rc = opal_dss.pack(buffer, &boptr, 1, OPAL_BYTE_OBJECT); + } + if (compressed) { + free(bo.bytes); + } + + /* if we have uniform #slots, then just flag it - no + * need to pass anything */ + if (unislots) { + i8 = -1 * slot; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &i8, 1, OPAL_INT8))) { + goto cleanup; + } + } else { + if (opal_compress.compress_block((uint8_t*)slots, nslots, + (uint8_t**)&bo.bytes, &sz)) { + /* mark that this was compressed */ + i8 = 1; + compressed = true; + bo.size = sz; + } else { + /* mark that this was not compressed */ + i8 = 0; + compressed = false; + bo.bytes = flags; + bo.size = nbitmap; + } + /* indicate compression */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &i8, 1, OPAL_INT8))) { + if (compressed) { + free(bo.bytes); + } + goto cleanup; + } + /* if compressed, provide the uncompressed size */ + if (compressed) { + sz = nslots; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &sz, 1, OPAL_SIZE))) { + free(bo.bytes); + goto cleanup; + } + } + /* add the object */ + boptr = &bo; + rc = opal_dss.pack(buffer, &boptr, 1, OPAL_BYTE_OBJECT); + } + if (compressed) { + free(bo.bytes); + } + + /* if we have uniform flags, then just flag it - no + * need to pass anything */ + if (uniflags) { + if (1 == flag) { + i8 = -1; + } else { + i8 = -2; + } + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &i8, 1, OPAL_INT8))) { + goto cleanup; + } + } else { + if (opal_compress.compress_block(flags, nbitmap, + (uint8_t**)&bo.bytes, &sz)) { + /* mark that this was compressed */ + i8 = 2; + compressed = true; + bo.size = sz; + } else { + /* mark that this was not compressed */ + i8 = 3; + compressed = false; + bo.bytes = flags; + bo.size = nbitmap; + } + /* indicate compression */ + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &i8, 1, OPAL_INT8))) { + if (compressed) { + free(bo.bytes); + } + goto cleanup; + } + /* if compressed, provide the uncompressed size */ + if (compressed) { + sz = nbitmap; + if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &sz, 1, OPAL_SIZE))) { + free(bo.bytes); + goto cleanup; + } + } + /* add the object */ + boptr = &bo; + rc = opal_dss.pack(buffer, &boptr, 1, OPAL_BYTE_OBJECT); + } + if (compressed) { + free(bo.bytes); + } + + cleanup: + if (NULL != slots) { + free(slots); + } + if (NULL != flags) { + free(flags); + } + return rc; +} + +int orte_util_parse_node_info(opal_buffer_t *buf) +{ + int8_t i8; + int rc = ORTE_SUCCESS, cnt, n, m; + orte_node_t *nptr; + size_t sz; + opal_byte_object_t *boptr; + uint16_t *slots = NULL; + uint8_t *flags = NULL; + uint8_t *topologies = NULL; + orte_topology_t *t2, **tps = NULL; + hwloc_topology_t topo; + char *sig; + + /* check to see if we have uniform topologies */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &i8, &cnt, OPAL_INT8))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + /* we already defaulted to uniform topology, so only need to + * process this if it is non-uniform */ + if (1 < i8) { + /* create an array to cache these */ + tps = (orte_topology_t**)malloc(sizeof(orte_topology_t*)); + for (n=0; n < i8; n++) { + cnt = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buf, &sig, &cnt, OPAL_STRING))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + cnt = 1; + if (ORTE_SUCCESS != (rc = opal_dss.unpack(buf, &topo, &cnt, OPAL_HWLOC_TOPO))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + /* new topology - record it */ + t2 = OBJ_NEW(orte_topology_t); + t2->sig = sig; + t2->topo = topo; + opal_pointer_array_add(orte_node_topologies, t2); + /* keep a cached copy */ + tps[n] = t2; + } + /* now get the array of assigned topologies */ + /* if compressed, get the uncompressed size */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &sz, &cnt, OPAL_SIZE))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + /* unpack the topologies object */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &boptr, &cnt, OPAL_BYTE_OBJECT))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + /* if compressed, decompress */ + if (1 == i8) { + if (!opal_compress.decompress_block((uint8_t**)&topologies, sz, + boptr->bytes, boptr->size)) { + ORTE_ERROR_LOG(ORTE_ERROR); + if (NULL != boptr->bytes) { + free(boptr->bytes); + } + free(boptr); + rc = ORTE_ERROR; + goto cleanup; + } + } else { + topologies = (uint8_t*)boptr->bytes; + boptr->bytes = NULL; + boptr->size = 0; + } + if (NULL != boptr->bytes) { + free(boptr->bytes); + } + free(boptr); + /* cycle across the node pool and assign the values */ + for (n=0, m=0; n < orte_node_pool->size; n++) { + if (NULL != (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { + nptr->topology = tps[topologies[m]]; + ++m; + } + } + } + + /* check to see if we have uniform slot assignments */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &i8, &cnt, OPAL_INT8))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + /* if so, then make every node the same */ + if (0 > i8) { + i8 = -1 * i8; + for (n=0; n < orte_node_pool->size; n++) { + if (NULL != (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { + nptr->slots = i8; + } + } + } else { + /* if compressed, get the uncompressed size */ + if (1 == i8) { + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &sz, &cnt, OPAL_SIZE))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + } + /* unpack the slots object */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &boptr, &cnt, OPAL_BYTE_OBJECT))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + /* if compressed, decompress */ + if (1 == i8) { + if (!opal_compress.decompress_block((uint8_t**)&slots, sz, + boptr->bytes, boptr->size)) { + ORTE_ERROR_LOG(ORTE_ERROR); + if (NULL != boptr->bytes) { + free(boptr->bytes); + } + free(boptr); + rc = ORTE_ERROR; + goto cleanup; + } + } else { + slots = (uint16_t*)boptr->bytes; + boptr->bytes = NULL; + boptr->size = 0; + } + if (NULL != boptr->bytes) { + free(boptr->bytes); + } + free(boptr); + /* cycle across the node pool and assign the values */ + for (n=0, m=0; n < orte_node_pool->size; n++) { + if (NULL != (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { + nptr->slots = slots[m]; + ++m; + } + } + } + + /* check to see if we have uniform flag assignments */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &i8, &cnt, OPAL_INT8))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + /* if so, then make every node the same */ + if (0 > i8) { + i8 += 2; + for (n=0; n < orte_node_pool->size; n++) { + if (NULL != (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { + if (i8) { + ORTE_FLAG_SET(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN); + } else { + ORTE_FLAG_UNSET(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN); + } + } + } + } else { + /* if compressed, get the uncompressed size */ + if (1 == i8) { + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &sz, &cnt, OPAL_SIZE))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + } + /* unpack the slots object */ + cnt = 1; + if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &boptr, &cnt, OPAL_BYTE_OBJECT))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + /* if compressed, decompress */ + if (1 == i8) { + if (!opal_compress.decompress_block((uint8_t**)&flags, sz, + boptr->bytes, boptr->size)) { + ORTE_ERROR_LOG(ORTE_ERROR); + if (NULL != boptr->bytes) { + free(boptr->bytes); + } + free(boptr); + rc = ORTE_ERROR; + goto cleanup; + } + } else { + flags = (uint8_t*)boptr->bytes; + boptr->bytes = NULL; + boptr->size = 0; + } + if (NULL != boptr->bytes) { + free(boptr->bytes); + } + free(boptr); + /* cycle across the node pool and assign the values */ + for (n=0, m=0; n < orte_node_pool->size; n++) { + if (NULL != (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) { + if (flags[m]) { + ORTE_FLAG_SET(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN); + } else { + ORTE_FLAG_UNSET(nptr, ORTE_NODE_FLAG_SLOTS_GIVEN); + } + ++m; + } + } + } + + cleanup: + if (NULL != slots) { + free(slots); + } + if (NULL != flags) { + free(flags); + } + if (NULL != tps) { + free(tps); + } + if (NULL != topologies) { + free(topologies); + } + return rc; +} + + int orte_util_generate_ppn(orte_job_t *jdata, opal_buffer_t *buf) { diff --git a/orte/util/nidmap.h b/orte/util/nidmap.h index ac935f58cc..ab728176aa 100644 --- a/orte/util/nidmap.h +++ b/orte/util/nidmap.h @@ -29,11 +29,20 @@ #include "opal/dss/dss_types.h" #include "orte/runtime/orte_globals.h" +/* pass info about the nodes in an allocation */ ORTE_DECLSPEC int orte_util_nidmap_create(opal_pointer_array_t *pool, opal_buffer_t *buf); ORTE_DECLSPEC int orte_util_decode_nidmap(opal_buffer_t *buf); + +/* pass topology and #slots info */ +ORTE_DECLSPEC int orte_util_pass_node_info(opal_buffer_t *buf); + +ORTE_DECLSPEC int orte_util_parse_node_info(opal_buffer_t *buf); + + +/* pass info about node assignments for a specific job */ ORTE_DECLSPEC int orte_util_generate_ppn(orte_job_t *jdata, opal_buffer_t *buf); From 1ee6c185f777673667b3e2f3095f5d4f3d71c960 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Wed, 6 Feb 2019 19:28:22 -0800 Subject: [PATCH 4/8] Remove stale code Signed-off-by: Ralph Castain --- orte/util/nidmap.c | 114 ++++++--------------------------------------- 1 file changed, 14 insertions(+), 100 deletions(-) diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index 73dfe518f6..9cd94d2730 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -220,8 +220,8 @@ int orte_util_nidmap_create(opal_pointer_array_t *pool, int orte_util_decode_nidmap(opal_buffer_t *buf) { - uint8_t u8, *vp8 = NULL, *flags = NULL; - uint16_t *vp16 = NULL, *slots = NULL; + uint8_t u8, *vp8 = NULL; + uint16_t *vp16 = NULL; uint32_t *vp32 = NULL, vpid; int cnt, rc, nbytes, n; bool compressed; @@ -364,98 +364,6 @@ int orte_util_decode_nidmap(opal_buffer_t *buf) vp8 = NULL; } - - /* unpack compression flag for slots */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &compressed, &cnt, OPAL_BOOL))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - /* if compressed, get the uncompressed size */ - if (compressed) { - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &sz, &cnt, OPAL_SIZE))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - } - - /* unpack the slots object */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &boptr, &cnt, OPAL_BYTE_OBJECT))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - /* if compressed, decompress */ - if (compressed) { - if (!opal_compress.decompress_block((uint8_t**)&slots, sz, - boptr->bytes, boptr->size)) { - ORTE_ERROR_LOG(ORTE_ERROR); - if (NULL != boptr->bytes) { - free(boptr->bytes); - } - free(boptr); - rc = ORTE_ERROR; - goto cleanup; - } - } else { - slots = (uint16_t*)boptr->bytes; - boptr->bytes = NULL; - boptr->size = 0; - } - if (NULL != boptr->bytes) { - free(boptr->bytes); - } - free(boptr); - - - /* unpack compression flag for node flags */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &compressed, &cnt, OPAL_BOOL))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - /* if compressed, get the uncompressed size */ - if (compressed) { - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &sz, &cnt, OPAL_SIZE))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - } - - /* unpack the node flags object */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &boptr, &cnt, OPAL_BYTE_OBJECT))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - /* if compressed, decompress */ - if (compressed) { - if (!opal_compress.decompress_block((uint8_t**)&flags, sz, - boptr->bytes, boptr->size)) { - ORTE_ERROR_LOG(ORTE_ERROR); - if (NULL != boptr->bytes) { - free(boptr->bytes); - } - free(boptr); - rc = ORTE_ERROR; - goto cleanup; - } - } else { - flags = (uint8_t*)boptr->bytes; - boptr->bytes = NULL; - boptr->size = 0; - } - if (NULL != boptr->bytes) { - free(boptr->bytes); - } - free(boptr); - /* if we are the HNP, we don't need any of this stuff */ if (ORTE_PROC_IS_HNP) { goto cleanup; @@ -478,12 +386,6 @@ int orte_util_decode_nidmap(opal_buffer_t *buf) nd = OBJ_NEW(orte_node_t); nd->name = names[n]; opal_pointer_array_set_item(orte_node_pool, n, nd); - /* set the #slots */ - nd->slots = slots[n]; - /* set the flags */ - if (1 == flags[n]) { - ORTE_FLAG_SET(nd, ORTE_NODE_FLAG_SLOTS_GIVEN); - } /* set the topology - always default to homogeneous * as that is the most common scenario */ nd->topology = t; @@ -526,6 +428,18 @@ int orte_util_decode_nidmap(opal_buffer_t *buf) } cleanup: + if (NULL != vp8) { + free(vp8); + } + if (NULL != vp16) { + free(vp16); + } + if (NULL != vp32) { + free(vp32); + } + if (NULL != names) { + opal_argv_free(names); + } return rc; } From 78152aec8538a019eaba4adcb310c90cf3b83ed0 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Thu, 7 Feb 2019 13:31:34 +0900 Subject: [PATCH 5/8] orte/nidmap: do not use compressed when uninitialized Signed-off-by: Gilles Gouaillardet --- orte/util/nidmap.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index 9cd94d2730..f35916b2a9 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. - * Copyright (c) 2018 Research Organization for Information Science - * and Technology (RIST). All rights reserved. + * Copyright (c) 2018-2019 Research Organization for Information Science + * and Technology (RIST). All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -602,9 +602,9 @@ int orte_util_pass_node_info(opal_buffer_t *buffer) /* add the object */ boptr = &bo; rc = opal_dss.pack(buffer, &boptr, 1, OPAL_BYTE_OBJECT); - } - if (compressed) { - free(bo.bytes); + if (compressed) { + free(bo.bytes); + } } /* if we have uniform #slots, then just flag it - no @@ -646,9 +646,9 @@ int orte_util_pass_node_info(opal_buffer_t *buffer) /* add the object */ boptr = &bo; rc = opal_dss.pack(buffer, &boptr, 1, OPAL_BYTE_OBJECT); - } - if (compressed) { - free(bo.bytes); + if (compressed) { + free(bo.bytes); + } } /* if we have uniform flags, then just flag it - no @@ -694,9 +694,9 @@ int orte_util_pass_node_info(opal_buffer_t *buffer) /* add the object */ boptr = &bo; rc = opal_dss.pack(buffer, &boptr, 1, OPAL_BYTE_OBJECT); - } - if (compressed) { - free(bo.bytes); + if (compressed) { + free(bo.bytes); + } } cleanup: From b80210c36afc8ff6d908044870ef23c218c86092 Mon Sep 17 00:00:00 2001 From: Gilles Gouaillardet Date: Fri, 8 Feb 2019 10:21:42 +0900 Subject: [PATCH 6/8] orte/util: strdup() in orte_util_decode_nidmap() since opal_argv_free() will free() Signed-off-by: Gilles Gouaillardet --- orte/util/nidmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index f35916b2a9..39feb9677b 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -384,7 +384,7 @@ int orte_util_decode_nidmap(opal_buffer_t *buf) for (n=0; NULL != names[n]; n++) { /* add this name to the pool */ nd = OBJ_NEW(orte_node_t); - nd->name = names[n]; + nd->name = strdup(names[n]); opal_pointer_array_set_item(orte_node_pool, n, nd); /* set the topology - always default to homogeneous * as that is the most common scenario */ From e56ee1e06a4ee4eec6d5076c4cc2449568fad9d6 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 8 Feb 2019 08:41:26 -0800 Subject: [PATCH 7/8] Remove the remaining cruft from dual oob transport * When we moved to allowing dual rml/oob transports, we added a bunch of stuff that is no longer needed. Remove it so as to simplify the messaging system. * Fix the routed/radix component so it correctly returns the parent's vpid Signed-off-by: Ralph Castain --- .../errmgr/default_hnp/errmgr_default_hnp.c | 19 +- .../default_orted/errmgr_default_orted.c | 25 +- orte/mca/ess/base/ess_base_std_orted.c | 29 +- orte/mca/ess/base/ess_base_std_tool.c | 18 +- orte/mca/ess/hnp/ess_hnp_module.c | 26 -- orte/mca/filem/base/filem_base_receive.c | 8 +- orte/mca/filem/raw/filem_raw_module.c | 5 +- orte/mca/grpcomm/base/grpcomm_base_stubs.c | 47 +-- orte/mca/grpcomm/direct/grpcomm_direct.c | 20 +- orte/mca/iof/hnp/iof_hnp_send.c | 7 +- orte/mca/iof/iof.h | 5 +- orte/mca/iof/orted/iof_orted.c | 5 +- orte/mca/iof/orted/iof_orted_read.c | 7 +- orte/mca/iof/orted/iof_orted_receive.c | 7 +- orte/mca/iof/tool/iof_tool.c | 12 +- orte/mca/oob/base/base.h | 7 +- orte/mca/oob/base/oob_base_stubs.c | 30 +- orte/mca/oob/oob.h | 6 +- orte/mca/oob/tcp/oob_tcp.c | 4 +- orte/mca/oob/tcp/oob_tcp_component.c | 46 +-- orte/mca/oob/tcp/oob_tcp_connection.c | 12 +- orte/mca/oob/tcp/oob_tcp_peer.h | 10 +- orte/mca/oob/tcp/oob_tcp_sendrecv.c | 3 +- orte/mca/oob/tcp/oob_tcp_sendrecv.h | 10 +- orte/mca/plm/base/plm_base_launch_support.c | 10 +- orte/mca/plm/base/plm_base_receive.c | 5 +- orte/mca/plm/rsh/plm_rsh_module.c | 20 +- orte/mca/rml/base/Makefile.am | 5 +- orte/mca/rml/base/base.h | 54 +-- orte/mca/rml/base/rml_base_frame.c | 166 +-------- orte/mca/rml/base/rml_base_msg_handlers.c | 3 +- orte/mca/rml/base/rml_base_stubs.c | 333 ------------------ orte/mca/rml/oob/rml_oob.h | 12 +- orte/mca/rml/oob/rml_oob_component.c | 284 +++++---------- orte/mca/rml/oob/rml_oob_send.c | 10 +- orte/mca/rml/rml.h | 296 +++------------- orte/mca/rml/rml_types.h | 15 +- orte/mca/routed/base/base.h | 29 +- orte/mca/routed/base/routed_base_fns.c | 215 +---------- orte/mca/routed/base/routed_base_frame.c | 107 ++---- orte/mca/routed/radix/routed_radix.c | 29 +- orte/mca/routed/routed.h | 43 +-- orte/mca/snapc/base/snapc_base_fns.c | 5 +- orte/mca/state/base/state_base_fns.c | 21 +- orte/mca/state/hnp/state_hnp.c | 5 +- orte/mca/state/orted/state_orted.c | 15 +- orte/orted/orted_comm.c | 49 +-- orte/orted/orted_main.c | 21 +- orte/orted/orted_submit.c | 18 +- orte/orted/pmix/pmix_server.c | 8 +- orte/orted/pmix/pmix_server_dyn.c | 5 +- orte/orted/pmix/pmix_server_fence.c | 5 +- orte/orted/pmix/pmix_server_gen.c | 5 +- orte/orted/pmix/pmix_server_pub.c | 9 +- orte/runtime/orte_data_server.c | 8 +- orte/runtime/orte_globals.c | 4 - orte/runtime/orte_globals.h | 5 - orte/runtime/orte_mca_params.c | 13 - orte/util/comm/comm.c | 25 +- orte/util/hnp_contact.c | 4 +- orte/util/nidmap.c | 33 +- orte/util/show_help.c | 8 +- 62 files changed, 411 insertions(+), 1859 deletions(-) delete mode 100644 orte/mca/rml/base/rml_base_stubs.c diff --git a/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c b/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c index c748d28067..8bf485293c 100644 --- a/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c +++ b/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c @@ -9,7 +9,7 @@ * Copyright (c) 2011 Oracle and/or all its affiliates. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -277,8 +277,7 @@ static void job_errors(int fd, short args, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid), ORTE_NAME_PRINT(&jdata->originator))); - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &jdata->originator, answer, + if (0 > (ret = orte_rml.send_buffer_nb(&jdata->originator, answer, ORTE_RML_TAG_LAUNCH_RESP, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); @@ -358,7 +357,6 @@ static void proc_errors(int fd, short args, void *cbdata) orte_proc_state_t state = caddy->proc_state; int i; int32_t i32, *i32ptr; - char *rtmod; ORTE_ACQUIRE_OBJECT(caddy); @@ -381,7 +379,6 @@ static void proc_errors(int fd, short args, void *cbdata) goto cleanup; } pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->vpid); - rtmod = orte_rml.get_routed(orte_mgmt_conduit); /* we MUST handle a communication failure before doing anything else * as it requires some special care to avoid normal termination issues @@ -412,9 +409,9 @@ static void proc_errors(int fd, short args, void *cbdata) "%s Comm failure: daemons terminating - recording daemon %s as gone", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(proc))); /* remove from dependent routes, if it is one */ - orte_routed.route_lost(rtmod, proc); + orte_routed.route_lost(proc); /* if all my routes and local children are gone, then terminate ourselves */ - if (0 == orte_routed.num_routes(rtmod)) { + if (0 == orte_routed.num_routes()) { for (i=0; i < orte_local_children->size; i++) { if (NULL != (proct = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) && ORTE_FLAG_TEST(pptr, ORTE_PROC_FLAG_ALIVE) && proct->state < ORTE_PROC_STATE_UNTERMINATED) { @@ -435,7 +432,7 @@ static void proc_errors(int fd, short args, void *cbdata) OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, "%s Comm failure: %d routes remain alive", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (int)orte_routed.num_routes(rtmod))); + (int)orte_routed.num_routes())); } goto cleanup; } @@ -493,7 +490,7 @@ static void proc_errors(int fd, short args, void *cbdata) } /* if all my routes and children are gone, then terminate ourselves nicely (i.e., this is a normal termination) */ - if (0 == orte_routed.num_routes(rtmod)) { + if (0 == orte_routed.num_routes()) { OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base_framework.framework_output, "%s errmgr:default:hnp all routes gone - exiting", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); @@ -718,7 +715,7 @@ static void proc_errors(int fd, short args, void *cbdata) default_hnp_abort(jdata); } /* remove from dependent routes, if it is one */ - orte_routed.route_lost(rtmod, proc); + orte_routed.route_lost(proc); break; case ORTE_PROC_STATE_UNABLE_TO_SEND_MSG: @@ -841,7 +838,7 @@ static void default_hnp_abort(orte_job_t *jdata) i32ptr = &i32; if (orte_get_attribute(&jdata->attributes, ORTE_JOB_NUM_NONZERO_EXIT, (void**)&i32ptr, OPAL_INT32)) { /* warn user */ - orte_show_help("help-errmgr-base.txt", "normal-termination-but", true, + orte_show_help("help-errmgr-base.txt", "normal-termination-but", true, (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "Primary" : "Child", (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "" : ORTE_LOCAL_JOBID_PRINT(jdata->jobid), i32, (1 == i32) ? "process returned\na non-zero exit code" : diff --git a/orte/mca/errmgr/default_orted/errmgr_default_orted.c b/orte/mca/errmgr/default_orted/errmgr_default_orted.c index 5fe4ca1793..ff0fe38fdc 100644 --- a/orte/mca/errmgr/default_orted/errmgr_default_orted.c +++ b/orte/mca/errmgr/default_orted/errmgr_default_orted.c @@ -204,8 +204,7 @@ static void orted_abort(int error_code, char *fmt, ...) } /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, alert, + if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert, ORTE_RML_TAG_PLM, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); @@ -303,8 +302,7 @@ static void job_errors(int fd, short args, void *cbdata) goto cleanup; } /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, alert, + if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert, ORTE_RML_TAG_PLM, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); @@ -321,7 +319,6 @@ static void proc_errors(int fd, short args, void *cbdata) orte_job_t *jdata; orte_process_name_t *proc = &caddy->name; orte_proc_state_t state = caddy->proc_state; - char *rtmod; orte_proc_t *child, *ptr; opal_buffer_t *alert; orte_plm_cmd_flag_t cmd; @@ -386,9 +383,6 @@ static void proc_errors(int fd, short args, void *cbdata) goto cleanup; } - /* get our management conduit's routed module name */ - rtmod = orte_rml.get_routed(orte_mgmt_conduit); - if (ORTE_PROC_STATE_COMM_FAILED == state) { /* if it is our own connection, ignore it */ if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_NAME, proc)) { @@ -444,7 +438,7 @@ static void proc_errors(int fd, short args, void *cbdata) } /* if all my routes and children are gone, then terminate ourselves nicely (i.e., this is a normal termination) */ - if (0 == orte_routed.num_routes(rtmod)) { + if (0 == orte_routed.num_routes()) { OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base_framework.framework_output, "%s errmgr:default:orted all routes gone - exiting", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); @@ -453,7 +447,7 @@ static void proc_errors(int fd, short args, void *cbdata) OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base_framework.framework_output, "%s errmgr:default:orted not exiting, num_routes() == %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (int)orte_routed.num_routes(rtmod))); + (int)orte_routed.num_routes())); } } /* if not, then we can continue */ @@ -513,8 +507,7 @@ static void proc_errors(int fd, short args, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&child->name), jdata->num_local_procs)); - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, alert, + if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert, ORTE_RML_TAG_PLM, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); @@ -579,7 +572,7 @@ static void proc_errors(int fd, short args, void *cbdata) } /* if all my routes and children are gone, then terminate ourselves nicely (i.e., this is a normal termination) */ - if (0 == orte_routed.num_routes(rtmod)) { + if (0 == orte_routed.num_routes()) { OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base_framework.framework_output, "%s errmgr:default:orted all routes gone - exiting", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); @@ -621,8 +614,7 @@ static void proc_errors(int fd, short args, void *cbdata) ORTE_NAME_PRINT(&child->name), jdata->num_local_procs)); /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, alert, + if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert, ORTE_RML_TAG_PLM, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); @@ -677,8 +669,7 @@ static void proc_errors(int fd, short args, void *cbdata) OBJ_RELEASE(jdata); /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, alert, + if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert, ORTE_RML_TAG_PLM, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index 365fc871e5..d50c9bfd45 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -109,7 +109,6 @@ int orte_ess_base_orted_setup(void) hwloc_obj_t obj; unsigned i, j; orte_topology_t *t; - opal_list_t transports; orte_ess_base_signal_t *sig; int idx; @@ -448,27 +447,6 @@ int orte_ess_base_orted_setup(void) goto error; } - /* get a conduit for our use - we never route IO over fabric */ - OBJ_CONSTRUCT(&transports, opal_list_t); - orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, - ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING); - if (ORTE_RML_CONDUIT_INVALID == (orte_mgmt_conduit = orte_rml.open_conduit(&transports))) { - ret = ORTE_ERR_OPEN_CONDUIT_FAIL; - error = "orte_rml_open_mgmt_conduit"; - goto error; - } - OPAL_LIST_DESTRUCT(&transports); - - OBJ_CONSTRUCT(&transports, opal_list_t); - orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, - ORTE_ATTR_LOCAL, orte_coll_transport, OPAL_STRING); - if (ORTE_RML_CONDUIT_INVALID == (orte_coll_conduit = orte_rml.open_conduit(&transports))) { - ret = ORTE_ERR_OPEN_CONDUIT_FAIL; - error = "orte_rml_open_coll_conduit"; - goto error; - } - OPAL_LIST_DESTRUCT(&transports); - /* * Group communications */ @@ -609,10 +587,6 @@ int orte_ess_base_orted_finalize(void) pmix_server_finalize(); (void) mca_base_framework_close(&opal_pmix_base_framework); - /* release the conduits */ - orte_rml.close_conduit(orte_mgmt_conduit); - orte_rml.close_conduit(orte_coll_conduit); - /* close frameworks */ (void) mca_base_framework_close(&orte_filem_base_framework); (void) mca_base_framework_close(&orte_grpcomm_base_framework); @@ -695,8 +669,7 @@ static void signal_forward_callback(int fd, short event, void *arg) } /* send it to ourselves */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_NAME, cmd, + if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_NAME, cmd, ORTE_RML_TAG_DAEMON, NULL, NULL))) { ORTE_ERROR_LOG(rc); diff --git a/orte/mca/ess/base/ess_base_std_tool.c b/orte/mca/ess/base/ess_base_std_tool.c index f3ca7baa3f..9f76890d23 100644 --- a/orte/mca/ess/base/ess_base_std_tool.c +++ b/orte/mca/ess/base/ess_base_std_tool.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014 Hochschule Esslingen. All rights reserved. * * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. @@ -94,7 +94,6 @@ int orte_ess_base_tool_setup(opal_list_t *flags) { int ret; char *error = NULL; - opal_list_t transports; opal_list_t info; opal_value_t *kv, *knext, val; opal_pmix_query_t *q; @@ -222,13 +221,6 @@ int orte_ess_base_tool_setup(opal_list_t *flags) goto error; } - /* get a conduit for our use - we never route IO over fabric */ - OBJ_CONSTRUCT(&transports, opal_list_t); - orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, - ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING); - orte_mgmt_conduit = orte_rml.open_conduit(&transports); - OPAL_LIST_DESTRUCT(&transports); - /* we -may- need to know the name of the head * of our session directory tree, particularly the * tmp base where any other session directories on @@ -269,7 +261,7 @@ int orte_ess_base_tool_setup(opal_list_t *flags) val.data.string = NULL; OBJ_DESTRUCT(&val); /* set the route to be direct */ - if (ORTE_SUCCESS != orte_routed.update_route(NULL, ORTE_PROC_MY_HNP, ORTE_PROC_MY_HNP)) { + if (ORTE_SUCCESS != orte_routed.update_route(ORTE_PROC_MY_HNP, ORTE_PROC_MY_HNP)) { orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, orte_process_info.my_hnp_uri); orte_finalize(); exit(1); @@ -277,7 +269,7 @@ int orte_ess_base_tool_setup(opal_list_t *flags) /* connect to the HNP so we can recv forwarded output */ buf = OBJ_NEW(opal_buffer_t); - ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, ORTE_PROC_MY_HNP, + ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_WARMUP_CONNECTION, orte_rml_send_callback, NULL); if (ORTE_SUCCESS != ret) { @@ -287,7 +279,7 @@ int orte_ess_base_tool_setup(opal_list_t *flags) } /* set the target hnp as our lifeline so we will terminate if it exits */ - orte_routed.set_lifeline(NULL, ORTE_PROC_MY_HNP); + orte_routed.set_lifeline(ORTE_PROC_MY_HNP); /* setup the IOF */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_iof_base_framework, 0))) { @@ -317,8 +309,6 @@ int orte_ess_base_tool_finalize(void) { orte_wait_finalize(); - orte_rml.close_conduit(orte_mgmt_conduit); - /* if I am a tool, then all I will have done is * a very small subset of orte_init - ensure that * I only back those elements out diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index 6e56d69ff5..3706e07596 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -141,7 +141,6 @@ static int rte_init(void) uint32_t h; int idx; orte_topology_t *t; - opal_list_t transports; orte_ess_base_signal_t *sig; opal_value_t val; @@ -370,27 +369,6 @@ static int rte_init(void) goto error; } - /* get a conduit for our use - we never route IO over fabric */ - OBJ_CONSTRUCT(&transports, opal_list_t); - orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, - ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING); - if (ORTE_RML_CONDUIT_INVALID == (orte_mgmt_conduit = orte_rml.open_conduit(&transports))) { - ret = ORTE_ERR_OPEN_CONDUIT_FAIL; - error = "orte_rml_open_mgmt_conduit"; - goto error; - } - OPAL_LIST_DESTRUCT(&transports); - - OBJ_CONSTRUCT(&transports, opal_list_t); - orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, - ORTE_ATTR_LOCAL, orte_coll_transport, OPAL_STRING); - if (ORTE_RML_CONDUIT_INVALID == (orte_coll_conduit = orte_rml.open_conduit(&transports))) { - ret = ORTE_ERR_OPEN_CONDUIT_FAIL; - error = "orte_rml_open_coll_conduit"; - goto error; - } - OPAL_LIST_DESTRUCT(&transports); - /* it is now safe to start the pmix server */ pmix_server_start(); @@ -776,10 +754,6 @@ static int rte_finalize(void) fflush(stdout); fflush(stderr); - /* release the conduits */ - orte_rml.close_conduit(orte_mgmt_conduit); - orte_rml.close_conduit(orte_coll_conduit); - (void) mca_base_framework_close(&orte_iof_base_framework); (void) mca_base_framework_close(&orte_rtc_base_framework); (void) mca_base_framework_close(&orte_odls_base_framework); diff --git a/orte/mca/filem/base/filem_base_receive.c b/orte/mca/filem/base/filem_base_receive.c index 30c958d3e2..6e9a6e7b82 100644 --- a/orte/mca/filem/base/filem_base_receive.c +++ b/orte/mca/filem/base/filem_base_receive.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -208,8 +208,7 @@ static void filem_base_process_get_proc_node_name_cmd(orte_process_name_t* sende return; } - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, + if (0 > (rc = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_FILEM_BASE_RESP, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); @@ -301,8 +300,7 @@ static void filem_base_process_get_remote_path_cmd(orte_process_name_t* sender, goto CLEANUP; } - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, + if (0 > (rc = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_FILEM_BASE_RESP, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); diff --git a/orte/mca/filem/raw/filem_raw_module.c b/orte/mca/filem/raw/filem_raw_module.c index 14359217d9..e499c3bc61 100644 --- a/orte/mca/filem/raw/filem_raw_module.c +++ b/orte/mca/filem/raw/filem_raw_module.c @@ -2,7 +2,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -870,8 +870,7 @@ static void send_complete(char *file, int status) OBJ_RELEASE(buf); return; } - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, buf, + if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_FILEM_BASE_RESP, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); diff --git a/orte/mca/grpcomm/base/grpcomm_base_stubs.c b/orte/mca/grpcomm/base/grpcomm_base_stubs.c index a27e8603e9..d410a399fd 100644 --- a/orte/mca/grpcomm/base/grpcomm_base_stubs.c +++ b/orte/mca/grpcomm/base/grpcomm_base_stubs.c @@ -231,7 +231,6 @@ orte_grpcomm_coll_t* orte_grpcomm_base_get_tracker(orte_grpcomm_signature_t *sig orte_namelist_t *nm; opal_list_t children; size_t n; - char *routed; /* search the existing tracker list to see if this already exists */ OPAL_LIST_FOREACH(coll, &orte_grpcomm_base.ongoing, orte_grpcomm_coll_t) { @@ -279,38 +278,30 @@ orte_grpcomm_coll_t* orte_grpcomm_base_get_tracker(orte_grpcomm_signature_t *sig return NULL; } - /* get the routed module for our conduit */ - routed = orte_rml.get_routed(orte_coll_conduit); - if (NULL == routed) { - /* this conduit is not routed, so we expect all daemons - * to directly participate */ - coll->nexpected = coll->ndmns; - } else { - /* cycle thru the array of daemons and compare them to our - * children in the routing tree, counting the ones that match - * so we know how many daemons we should receive contributions from */ - OBJ_CONSTRUCT(&children, opal_list_t); - orte_routed.get_routing_list(routed, &children); - while (NULL != (nm = (orte_namelist_t*)opal_list_remove_first(&children))) { - for (n=0; n < coll->ndmns; n++) { - if (nm->name.vpid == coll->dmns[n]) { - coll->nexpected++; - break; - } - } - OBJ_RELEASE(nm); - } - OPAL_LIST_DESTRUCT(&children); - - /* see if I am in the array of participants - note that I may - * be in the rollup tree even though I'm not participating - * in the collective itself */ + /* cycle thru the array of daemons and compare them to our + * children in the routing tree, counting the ones that match + * so we know how many daemons we should receive contributions from */ + OBJ_CONSTRUCT(&children, opal_list_t); + orte_routed.get_routing_list(&children); + while (NULL != (nm = (orte_namelist_t*)opal_list_remove_first(&children))) { for (n=0; n < coll->ndmns; n++) { - if (coll->dmns[n] == ORTE_PROC_MY_NAME->vpid) { + if (nm->name.vpid == coll->dmns[n]) { coll->nexpected++; break; } } + OBJ_RELEASE(nm); + } + OPAL_LIST_DESTRUCT(&children); + + /* see if I am in the array of participants - note that I may + * be in the rollup tree even though I'm not participating + * in the collective itself */ + for (n=0; n < coll->ndmns; n++) { + if (coll->dmns[n] == ORTE_PROC_MY_NAME->vpid) { + coll->nexpected++; + break; + } } return coll; diff --git a/orte/mca/grpcomm/direct/grpcomm_direct.c b/orte/mca/grpcomm/direct/grpcomm_direct.c index ce95319dbc..35779b2ed0 100644 --- a/orte/mca/grpcomm/direct/grpcomm_direct.c +++ b/orte/mca/grpcomm/direct/grpcomm_direct.c @@ -112,8 +112,7 @@ static int xcast(orte_vpid_t *vpids, /* send it to the HNP (could be myself) for relay */ OBJ_RETAIN(buf); // we'll let the RML release it - if (0 > (rc = orte_rml.send_buffer_nb(orte_coll_conduit, - ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_XCAST, + if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_XCAST, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buf); @@ -153,8 +152,7 @@ static int allgather(orte_grpcomm_coll_t *coll, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* send the info to ourselves for tracking */ - rc = orte_rml.send_buffer_nb(orte_coll_conduit, - ORTE_PROC_MY_NAME, relay, + rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_NAME, relay, ORTE_RML_TAG_ALLGATHER_DIRECT, orte_rml_send_callback, NULL); return rc; @@ -245,8 +243,7 @@ static void allgather_recv(int status, orte_process_name_t* sender, /* transfer the collected bucket */ opal_dss.copy_payload(reply, &coll->bucket); /* send the info to our parent */ - rc = orte_rml.send_buffer_nb(orte_coll_conduit, - ORTE_PROC_MY_PARENT, reply, + rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_PARENT, reply, ORTE_RML_TAG_ALLGATHER_DIRECT, orte_rml_send_callback, NULL); } @@ -271,7 +268,6 @@ static void xcast_recv(int status, orte_process_name_t* sender, opal_list_t coll; orte_grpcomm_signature_t *sig; orte_rml_tag_t tag; - char *rtmod; size_t inlen, cmplen; uint8_t *packed_data, *cmpdata; int32_t nvals, i; @@ -372,9 +368,6 @@ static void xcast_recv(int status, orte_process_name_t* sender, return; } - /* get our conduit's routed module name */ - rtmod = orte_rml.get_routed(orte_coll_conduit); - /* if this is headed for the daemon command processor, * then we first need to check for add_local_procs * as that command includes some needed wireup info */ @@ -424,7 +417,7 @@ static void xcast_recv(int status, orte_process_name_t* sender, /* update the routing plan - the HNP already did * it when it computed the VM, so don't waste time * re-doing it here */ - orte_routed.update_routing_plan(rtmod); + orte_routed.update_routing_plan(); } /* routing is now possible */ orte_routed_base.routing_enabled = true; @@ -523,7 +516,7 @@ static void xcast_recv(int status, orte_process_name_t* sender, relay: if (!orte_do_not_launch) { /* get the list of next recipients from the routed module */ - orte_routed.get_routing_list(rtmod, &coll); + orte_routed.get_routing_list(&coll); /* if list is empty, no relay is required */ if (opal_list_is_empty(&coll)) { @@ -569,8 +562,7 @@ static void xcast_recv(int status, orte_process_name_t* sender, ORTE_FORCED_TERMINATE(ORTE_ERR_UNREACH); continue; } - if (ORTE_SUCCESS != (ret = orte_rml.send_buffer_nb(orte_coll_conduit, - &nm->name, rly, ORTE_RML_TAG_XCAST, + if (ORTE_SUCCESS != (ret = orte_rml.send_buffer_nb(&nm->name, rly, ORTE_RML_TAG_XCAST, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(rly); diff --git a/orte/mca/iof/hnp/iof_hnp_send.c b/orte/mca/iof/hnp/iof_hnp_send.c index 89f9ff8761..9e2f202ea5 100644 --- a/orte/mca/iof/hnp/iof_hnp_send.c +++ b/orte/mca/iof/hnp/iof_hnp_send.c @@ -12,7 +12,7 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC * All rights reserved - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -109,9 +109,8 @@ int orte_iof_hnp_send_data_to_endpoint(orte_process_name_t *host, /* send the buffer to the host - this is either a daemon or * a tool that requested IOF */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - host, buf, ORTE_RML_TAG_IOF_PROXY, - orte_rml_send_callback, NULL))) { + if (0 > (rc = orte_rml.send_buffer_nb(host, buf, ORTE_RML_TAG_IOF_PROXY, + orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); return rc; } diff --git a/orte/mca/iof/iof.h b/orte/mca/iof/iof.h index 742eab42ca..30292863ed 100644 --- a/orte/mca/iof/iof.h +++ b/orte/mca/iof/iof.h @@ -13,7 +13,7 @@ * Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -149,8 +149,7 @@ BEGIN_C_DECLS opal_dss.pack(buf, (b), 1, ORTE_NAME); \ \ /* send the buffer to the HNP */ \ - orte_rml.send_buffer_nb(orte_mgmt_conduit, \ - ORTE_PROC_MY_HNP, buf, \ + orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, \ ORTE_RML_TAG_IOF_HNP, \ orte_rml_send_callback, NULL); \ } while(0); diff --git a/orte/mca/iof/orted/iof_orted.c b/orte/mca/iof/orted/iof_orted.c index e06c3ce9ed..a3b84bcde2 100644 --- a/orte/mca/iof/orted/iof_orted.c +++ b/orte/mca/iof/orted/iof_orted.c @@ -12,7 +12,7 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 Mellanox Technologies. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -513,8 +513,7 @@ static int orted_output(const orte_process_name_t* peer, "%s iof:orted:output sending %d bytes to HNP", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)strlen(msg)+1)); - orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP, + orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP, orte_rml_send_callback, NULL); return ORTE_SUCCESS; diff --git a/orte/mca/iof/orted/iof_orted_read.c b/orte/mca/iof/orted/iof_orted_read.c index d1e07898bb..c1f1e3a646 100644 --- a/orte/mca/iof/orted/iof_orted_read.c +++ b/orte/mca/iof/orted/iof_orted_read.c @@ -12,7 +12,7 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -140,9 +140,8 @@ void orte_iof_orted_read_handler(int fd, short event, void *cbdata) "%s iof:orted:read handler sending %d bytes to HNP", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes)); - orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP, - orte_rml_send_callback, NULL); + orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP, + orte_rml_send_callback, NULL); /* re-add the event */ ORTE_IOF_READ_ACTIVATE(rev); diff --git a/orte/mca/iof/orted/iof_orted_receive.c b/orte/mca/iof/orted/iof_orted_receive.c index 9fae3499fc..c49c437be8 100644 --- a/orte/mca/iof/orted/iof_orted_receive.c +++ b/orte/mca/iof/orted/iof_orted_receive.c @@ -12,7 +12,7 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2016 Intel Corporation. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -72,9 +72,8 @@ void orte_iof_orted_send_xonxoff(orte_iof_tag_t tag) (ORTE_IOF_XON == tag) ? "xon" : "xoff")); /* send the buffer to the HNP */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP, - send_cb, NULL))) { + if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP, + send_cb, NULL))) { ORTE_ERROR_LOG(rc); } } diff --git a/orte/mca/iof/tool/iof_tool.c b/orte/mca/iof/tool/iof_tool.c index 9ec085bc18..a9ebb86a80 100644 --- a/orte/mca/iof/tool/iof_tool.c +++ b/orte/mca/iof/tool/iof_tool.c @@ -12,7 +12,7 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -170,9 +170,8 @@ static int tool_pull(const orte_process_name_t* src_name, /* send the buffer to the correct HNP */ ORTE_HNP_NAME_FROM_JOB(&hnp, src_name->jobid); - orte_rml.send_buffer_nb(orte_mgmt_conduit, - &hnp, buf, ORTE_RML_TAG_IOF_HNP, - send_cb, NULL); + orte_rml.send_buffer_nb(&hnp, buf, ORTE_RML_TAG_IOF_HNP, + send_cb, NULL); return ORTE_SUCCESS; } @@ -220,9 +219,8 @@ static int tool_close(const orte_process_name_t* src_name, /* send the buffer to the correct HNP */ ORTE_HNP_NAME_FROM_JOB(&hnp, src_name->jobid); - orte_rml.send_buffer_nb(orte_mgmt_conduit, - &hnp, buf, ORTE_RML_TAG_IOF_HNP, - send_cb, NULL); + orte_rml.send_buffer_nb(&hnp, buf, ORTE_RML_TAG_IOF_HNP, + send_cb, NULL); return ORTE_SUCCESS; } diff --git a/orte/mca/oob/base/base.h b/orte/mca/oob/base/base.h index 830a05ad33..fb4ed1c0be 100644 --- a/orte/mca/oob/base/base.h +++ b/orte/mca/oob/base/base.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2017-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2017-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -144,11 +144,6 @@ ORTE_DECLSPEC void orte_oob_base_send_nb(int fd, short args, void *cbdata); */ ORTE_DECLSPEC void orte_oob_base_get_addr(char **uri); -/* Get the available transports and their attributes */ -#define ORTE_OOB_GET_TRANSPORTS(u) orte_oob_base_get_transports(u) -ORTE_DECLSPEC void orte_oob_base_get_transports(opal_list_t *transports); - - #if OPAL_ENABLE_FT_CR == 1 ORTE_DECLSPEC void orte_oob_base_ft_event(int fd, short args, void *cbdata); #endif diff --git a/orte/mca/oob/base/oob_base_stubs.c b/orte/mca/oob/base/oob_base_stubs.c index 03da3c815e..7ead5e847e 100644 --- a/orte/mca/oob/base/oob_base_stubs.c +++ b/orte/mca/oob/base/oob_base_stubs.c @@ -2,7 +2,7 @@ /* * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -107,7 +107,7 @@ void orte_oob_base_send_nb(int fd, short args, void *cbdata) OPAL_LIST_FOREACH(cli, &orte_oob_base.actives, mca_base_component_list_item_t) { component = (mca_oob_base_component_t*)cli->cli_component; if (NULL != component->is_reachable) { - if (component->is_reachable(msg->routed, &msg->dst)) { + if (component->is_reachable(&msg->dst)) { /* there is a way to reach this peer - record it * so we don't waste this time again */ @@ -170,7 +170,7 @@ void orte_oob_base_send_nb(int fd, short args, void *cbdata) OPAL_LIST_FOREACH(cli, &orte_oob_base.actives, mca_base_component_list_item_t) { component = (mca_oob_base_component_t*)cli->cli_component; /* is this peer reachable via this component? */ - if (!component->is_reachable(msg->routed, &msg->dst)) { + if (!component->is_reachable(&msg->dst)) { continue; } /* it is addressable, so attempt to send via that transport */ @@ -384,30 +384,6 @@ static void process_uri(char *uri) opal_argv_free(uris); } -void orte_oob_base_get_transports(opal_list_t *transports) -{ - mca_base_component_list_item_t *cli; - mca_oob_base_component_t *component; - orte_rml_pathway_t *p; - - opal_output_verbose(5, orte_oob_base_framework.framework_output, - "%s: get transports", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - - OPAL_LIST_FOREACH(cli, &orte_oob_base.actives, mca_base_component_list_item_t) { - component = (mca_oob_base_component_t*)cli->cli_component; - opal_output_verbose(5, orte_oob_base_framework.framework_output, - "%s:get transports for component %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - component->oob_base.mca_component_name); - if (NULL != component->query_transports) { - if (NULL != (p = component->query_transports())) { - opal_list_append(transports, &p->super); - } - } - } -} - #if OPAL_ENABLE_FT_CR == 1 void orte_oob_base_ft_event(int sd, short argc, void *cbdata) { diff --git a/orte/mca/oob/oob.h b/orte/mca/oob/oob.h index ebb0540ea9..15650d2b39 100644 --- a/orte/mca/oob/oob.h +++ b/orte/mca/oob/oob.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -55,9 +55,8 @@ typedef int (*mca_oob_base_component_send_fn_t)(orte_rml_send_t *msg); typedef char* (*mca_oob_base_component_get_addr_fn_t)(void); typedef int (*mca_oob_base_component_set_addr_fn_t)(orte_process_name_t *peer, char **uris); -typedef bool (*mca_oob_base_component_is_reachable_fn_t)(char *routed, orte_process_name_t *peer); +typedef bool (*mca_oob_base_component_is_reachable_fn_t)(orte_process_name_t *peer); typedef void (*mca_oob_ping_callback_fn_t)(int status, void *cbdata); -typedef orte_rml_pathway_t* (*mca_oob_base_component_query_transports_fn_t)(void); #if OPAL_ENABLE_FT_CR == 1 typedef int (*mca_oob_base_component_ft_event_fn_t)(int state); @@ -75,7 +74,6 @@ typedef struct { mca_oob_base_component_get_addr_fn_t get_addr; mca_oob_base_component_set_addr_fn_t set_addr; mca_oob_base_component_is_reachable_fn_t is_reachable; - mca_oob_base_component_query_transports_fn_t query_transports; #if OPAL_ENABLE_FT_CR == 1 mca_oob_base_component_ft_event_fn_t ft_event; #endif diff --git a/orte/mca/oob/tcp/oob_tcp.c b/orte/mca/oob/tcp/oob_tcp.c index d5f5ce9c55..15f326d277 100644 --- a/orte/mca/oob/tcp/oob_tcp.c +++ b/orte/mca/oob/tcp/oob_tcp.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -180,7 +180,7 @@ static void send_nb(orte_rml_send_t *msg) /* do we have a route to this peer (could be direct)? */ - hop = orte_routed.get_route(msg->routed, &msg->dst); + hop = orte_routed.get_route(&msg->dst); /* do we know this hop? */ if (NULL == (peer = mca_oob_tcp_peer_lookup(&hop))) { /* push this back to the component so it can try diff --git a/orte/mca/oob/tcp/oob_tcp_component.c b/orte/mca/oob/tcp/oob_tcp_component.c index 85a17e01a6..cdc79cd9e9 100644 --- a/orte/mca/oob/tcp/oob_tcp_component.c +++ b/orte/mca/oob/tcp/oob_tcp_component.c @@ -103,8 +103,7 @@ static int component_send(orte_rml_send_t *msg); static char* component_get_addr(void); static int component_set_addr(orte_process_name_t *peer, char **uris); -static bool component_is_reachable(char *rtmod, orte_process_name_t *peer); -static orte_rml_pathway_t* component_query_transports(void); +static bool component_is_reachable(orte_process_name_t *peer); #if OPAL_ENABLE_FT_CR == 1 static int component_ft_event(int state); #endif @@ -135,7 +134,6 @@ mca_oob_tcp_component_t mca_oob_tcp_component = { .get_addr = component_get_addr, .set_addr = component_set_addr, .is_reachable = component_is_reachable, - .query_transports = component_query_transports, #if OPAL_ENABLE_FT_CR == 1 .ft_event = component_ft_event, #endif @@ -627,37 +625,6 @@ static int component_available(void) return ORTE_SUCCESS; } -static orte_rml_pathway_t* component_query_transports(void) -{ - orte_rml_pathway_t *p; - char *qual; - - /* if neither IPv4 or IPv6 connections are available, then - * we have nothing to support */ - if (NULL == mca_oob_tcp_component.ipv4conns && - NULL == mca_oob_tcp_component.ipv6conns) { - return NULL; - } - - /* if we get here, then we support Ethernet and TCP */ - p = OBJ_NEW(orte_rml_pathway_t); - p->component = strdup("oob"); - orte_set_attribute(&p->attributes, ORTE_RML_TRANSPORT_TYPE, ORTE_ATTR_LOCAL, "Ethernet", OPAL_STRING); - orte_set_attribute(&p->attributes, ORTE_RML_PROTOCOL_TYPE, ORTE_ATTR_LOCAL, "TCP", OPAL_STRING); - /* setup our qualifiers - we route communications, may have IPv4 and/or IPv6, etc. */ - if (NULL != mca_oob_tcp_component.ipv4conns && - NULL != mca_oob_tcp_component.ipv6conns) { - qual = "routed=true:ipv4:ipv6"; - } else if (NULL == mca_oob_tcp_component.ipv6conns) { - qual = "routed=true:ipv4"; - } else { - qual = "routed=true:ipv6"; - } - orte_set_attribute(&p->attributes, ORTE_RML_QUALIFIER_ATTRIB, ORTE_ATTR_LOCAL, qual, OPAL_STRING); - - return p; -} - /* Start all modules */ static int component_startup(void) { @@ -1008,12 +975,12 @@ static int component_set_addr(orte_process_name_t *peer, return ORTE_ERR_TAKE_NEXT_OPTION; } -static bool component_is_reachable(char *rtmod, orte_process_name_t *peer) +static bool component_is_reachable(orte_process_name_t *peer) { orte_process_name_t hop; /* if we have a route to this peer, then we can reach it */ - hop = orte_routed.get_route(rtmod, peer); + hop = orte_routed.get_route(peer); if (ORTE_JOBID_INVALID == hop.jobid || ORTE_VPID_INVALID == hop.vpid) { opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, @@ -1102,7 +1069,7 @@ void mca_oob_tcp_component_lost_connection(int fd, short args, void *cbdata) if (!orte_finalizing) { /* activate the proc state */ - if (ORTE_SUCCESS != orte_routed.route_lost(pop->rtmod, &pop->peer)) { + if (ORTE_SUCCESS != orte_routed.route_lost(&pop->peer)) { ORTE_ACTIVATE_PROC_STATE(&pop->peer, ORTE_PROC_STATE_LIFELINE_LOST); } else { ORTE_ACTIVATE_PROC_STATE(&pop->peer, ORTE_PROC_STATE_COMM_FAILED); @@ -1216,7 +1183,6 @@ void mca_oob_tcp_component_hop_unknown(int fd, short args, void *cbdata) snd->count = mop->snd->hdr.nbytes; snd->cbfunc.iov = NULL; snd->cbdata = NULL; - snd->routed = strdup(mop->snd->hdr.routed); /* activate the OOB send state */ ORTE_OOB_SEND(snd); /* protect the data */ @@ -1416,15 +1382,11 @@ OBJ_CLASS_INSTANCE(mca_oob_tcp_addr_t, static void pop_cons(mca_oob_tcp_peer_op_t *pop) { - pop->rtmod = NULL; pop->net = NULL; pop->port = NULL; } static void pop_des(mca_oob_tcp_peer_op_t *pop) { - if (NULL != pop->rtmod) { - free(pop->rtmod); - } if (NULL != pop->net) { free(pop->net); } diff --git a/orte/mca/oob/tcp/oob_tcp_connection.c b/orte/mca/oob/tcp/oob_tcp_connection.c index ff06ec8a97..819d2d77bf 100644 --- a/orte/mca/oob/tcp/oob_tcp_connection.c +++ b/orte/mca/oob/tcp/oob_tcp_connection.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2018 Cisco Systems, Inc. All rights reserved * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. @@ -334,7 +334,7 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) * an event in the component event base, and so it will fire async * from us if we are in our own progress thread */ - ORTE_ACTIVATE_TCP_CMP_OP(peer, NULL, mca_oob_tcp_component_failed_to_connect); + ORTE_ACTIVATE_TCP_CMP_OP(peer, mca_oob_tcp_component_failed_to_connect); /* FIXME: post any messages in the send queue back to the OOB * level for reassignment */ @@ -937,7 +937,7 @@ int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* pr, /* set the peer into the component and OOB-level peer tables to indicate * that we know this peer and we will be handling him */ - ORTE_ACTIVATE_TCP_CMP_OP(peer, NULL, mca_oob_tcp_component_set_module); + ORTE_ACTIVATE_TCP_CMP_OP(peer, mca_oob_tcp_component_set_module); /* connected */ tcp_peer_connected(peer); @@ -968,7 +968,7 @@ static void tcp_peer_connected(mca_oob_tcp_peer_t* peer) } /* update the route */ - orte_routed.update_route(NULL, &peer->name, &peer->name); + orte_routed.update_route(&peer->name, &peer->name); /* initiate send of first message on queue */ if (NULL == peer->send_msg) { @@ -1027,7 +1027,7 @@ void mca_oob_tcp_peer_close(mca_oob_tcp_peer_t *peer) /* inform the component-level that we have lost a connection so * it can decide what to do about it. */ - ORTE_ACTIVATE_TCP_CMP_OP(peer, NULL, mca_oob_tcp_component_lost_connection); + ORTE_ACTIVATE_TCP_CMP_OP(peer, mca_oob_tcp_component_lost_connection); if (orte_orteds_term_ordered || orte_finalizing || orte_abnormal_term_ordered) { /* nothing more to do */ @@ -1238,7 +1238,7 @@ bool mca_oob_tcp_peer_accept(mca_oob_tcp_peer_t* peer) /* set the peer into the component and OOB-level peer tables to indicate * that we know this peer and we will be handling him */ - ORTE_ACTIVATE_TCP_CMP_OP(peer, NULL, mca_oob_tcp_component_set_module); + ORTE_ACTIVATE_TCP_CMP_OP(peer, mca_oob_tcp_component_set_module); tcp_peer_connected(peer); if (!peer->recv_ev_active) { diff --git a/orte/mca/oob/tcp/oob_tcp_peer.h b/orte/mca/oob/tcp/oob_tcp_peer.h index 8d04fd4438..9a175e084e 100644 --- a/orte/mca/oob/tcp/oob_tcp_peer.h +++ b/orte/mca/oob/tcp/oob_tcp_peer.h @@ -12,7 +12,7 @@ * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -73,21 +73,15 @@ typedef struct { uint16_t af_family; char *net; char *port; - char *rtmod; } mca_oob_tcp_peer_op_t; OBJ_CLASS_DECLARATION(mca_oob_tcp_peer_op_t); -#define ORTE_ACTIVATE_TCP_CMP_OP(p, r, cbfunc) \ +#define ORTE_ACTIVATE_TCP_CMP_OP(p, cbfunc) \ do { \ mca_oob_tcp_peer_op_t *pop; \ - char *proxy; \ pop = OBJ_NEW(mca_oob_tcp_peer_op_t); \ pop->peer.jobid = (p)->name.jobid; \ pop->peer.vpid = (p)->name.vpid; \ - proxy = (r); \ - if (NULL != proxy) { \ - pop->rtmod = strdup(proxy); \ - } \ ORTE_THREADSHIFT(pop, orte_oob_base.ev_base, \ (cbfunc), ORTE_MSG_PRI); \ } while(0); diff --git a/orte/mca/oob/tcp/oob_tcp_sendrecv.c b/orte/mca/oob/tcp/oob_tcp_sendrecv.c index 6db0243ed5..c1ee0740f5 100644 --- a/orte/mca/oob/tcp/oob_tcp_sendrecv.c +++ b/orte/mca/oob/tcp/oob_tcp_sendrecv.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -586,7 +586,6 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata) snd->data = peer->recv_msg->data; snd->seq_num = peer->recv_msg->hdr.seq_num; snd->count = peer->recv_msg->hdr.nbytes; - snd->routed = strdup(peer->recv_msg->hdr.routed); snd->cbfunc.iov = NULL; snd->cbdata = NULL; /* activate the OOB send state */ diff --git a/orte/mca/oob/tcp/oob_tcp_sendrecv.h b/orte/mca/oob/tcp/oob_tcp_sendrecv.h index e906c962a9..1ac1b570fc 100644 --- a/orte/mca/oob/tcp/oob_tcp_sendrecv.h +++ b/orte/mca/oob/tcp/oob_tcp_sendrecv.h @@ -12,7 +12,7 @@ * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2018 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -109,10 +109,6 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_recv_t); _s->hdr.type = MCA_OOB_TCP_USER; \ _s->hdr.tag = (m)->tag; \ _s->hdr.seq_num = (m)->seq_num; \ - if (NULL != (m)->routed) { \ - (void)opal_string_copy(_s->hdr.routed, (m)->routed, \ - ORTE_MAX_RTD_SIZE); \ - } \ /* point to the actual message */ \ _s->msg = (m); \ /* set the total number of bytes to be sent */ \ @@ -157,10 +153,6 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_recv_t); _s->hdr.type = MCA_OOB_TCP_USER; \ _s->hdr.tag = (m)->tag; \ _s->hdr.seq_num = (m)->seq_num; \ - if (NULL != (m)->routed) { \ - (void)opal_string_copy(_s->hdr.routed, (m)->routed, \ - ORTE_MAX_RTD_SIZE); \ - } \ /* point to the actual message */ \ _s->msg = (m); \ /* set the total number of bytes to be sent */ \ diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index 57f609bfc1..1d9da92aad 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -179,7 +179,7 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata) orte_ras_base_display_alloc(); } /* ensure we update the routing plan */ - orte_routed.update_routing_plan(NULL); + orte_routed.update_routing_plan(); /* prep the buffer */ OBJ_CONSTRUCT(&buf, opal_buffer_t); @@ -812,8 +812,7 @@ void orte_plm_base_post_launch(int fd, short args, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid), ORTE_NAME_PRINT(&jdata->originator))); - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &jdata->originator, answer, + if (0 > (ret = orte_rml.send_buffer_nb(&jdata->originator, answer, ORTE_RML_TAG_LAUNCH_RESP, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); @@ -1346,8 +1345,7 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, goto CLEANUP; } /* send it */ - orte_rml.send_buffer_nb(orte_mgmt_conduit, - &dname, relay, + orte_rml.send_buffer_nb(&dname, relay, ORTE_RML_TAG_DAEMON, orte_rml_send_callback, NULL); /* we will count this node as completed @@ -2310,7 +2308,7 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata) /* ensure all routing plans are up-to-date - we need this * so we know how to tree-spawn and/or xcast info */ - orte_routed.update_routing_plan(NULL); + orte_routed.update_routing_plan(); } /* mark that the daemon job changed */ diff --git a/orte/mca/plm/base/plm_base_receive.c b/orte/mca/plm/base/plm_base_receive.c index d89a6b9313..c95f203bc7 100644 --- a/orte/mca/plm/base/plm_base_receive.c +++ b/orte/mca/plm/base/plm_base_receive.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2011 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -278,8 +278,7 @@ void orte_plm_base_recv(int status, orte_process_name_t* sender, } /* send the response back to the sender */ - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, ORTE_RML_TAG_LAUNCH_RESP, + if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_LAUNCH_RESP, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(answer); diff --git a/orte/mca/plm/rsh/plm_rsh_module.c b/orte/mca/plm/rsh/plm_rsh_module.c index d913dab6de..ffe0488ea4 100644 --- a/orte/mca/plm/rsh/plm_rsh_module.c +++ b/orte/mca/plm/rsh/plm_rsh_module.c @@ -14,7 +14,7 @@ * reserved. * Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2011-2017 IBM Corporation. All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -265,7 +265,6 @@ static void rsh_wait_daemon(int sd, short flags, void *cbdata) orte_wait_tracker_t *t2 = (orte_wait_tracker_t*)cbdata; orte_plm_rsh_caddy_t *caddy=(orte_plm_rsh_caddy_t*)t2->cbdata; orte_proc_t *daemon = caddy->daemon; - char *rtmod; if (orte_orteds_term_ordered || orte_abnormal_term_ordered) { /* ignore any such report - it will occur if we left the @@ -290,8 +289,7 @@ static void rsh_wait_daemon(int sd, short flags, void *cbdata) buf = OBJ_NEW(opal_buffer_t); opal_dss.pack(buf, &(daemon->name.vpid), 1, ORTE_VPID); opal_dss.pack(buf, &daemon->exit_code, 1, OPAL_INT); - orte_rml.send_buffer_nb(orte_coll_conduit, - ORTE_PROC_MY_HNP, buf, + orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_REPORT_REMOTE_LAUNCH, orte_rml_send_callback, NULL); /* note that this daemon failed */ @@ -312,8 +310,7 @@ static void rsh_wait_daemon(int sd, short flags, void *cbdata) /* remove it from the routing table to ensure num_routes * returns the correct value */ - rtmod = orte_rml.get_routed(orte_coll_conduit); - orte_routed.route_lost(rtmod, &daemon->name); + orte_routed.route_lost(&daemon->name); /* report that the daemon has failed so we can exit */ ORTE_ACTIVATE_PROC_STATE(&daemon->name, ORTE_PROC_STATE_FAILED_TO_START); } @@ -797,7 +794,6 @@ static int remote_spawn(void) orte_job_t *daemons; opal_list_t coll; orte_namelist_t *child; - char *rtmod; OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output, "%s plm:rsh: remote spawn called", @@ -816,9 +812,8 @@ static int remote_spawn(void) } /* get the updated routing list */ - rtmod = orte_rml.get_routed(orte_coll_conduit); OBJ_CONSTRUCT(&coll, opal_list_t); - orte_routed.get_routing_list(rtmod, &coll); + orte_routed.get_routing_list(&coll); /* if I have no children, just return */ if (0 == opal_list_get_size(&coll)) { @@ -913,8 +908,7 @@ cleanup: buf = OBJ_NEW(opal_buffer_t); opal_dss.pack(buf, &target.vpid, 1, ORTE_VPID); opal_dss.pack(buf, &rc, 1, OPAL_INT); - orte_rml.send_buffer_nb(orte_coll_conduit, - ORTE_PROC_MY_HNP, buf, + orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_REPORT_REMOTE_LAUNCH, orte_rml_send_callback, NULL); } @@ -1040,7 +1034,6 @@ static void launch_daemons(int fd, short args, void *cbdata) char *username; int port, *portptr; orte_namelist_t *child; - char *rtmod; ORTE_ACQUIRE_OBJECT(state); @@ -1185,8 +1178,7 @@ static void launch_daemons(int fd, short args, void *cbdata) /* get the updated routing list */ OBJ_CONSTRUCT(&coll, opal_list_t); - rtmod = orte_rml.get_routed(orte_coll_conduit); - orte_routed.get_routing_list(rtmod, &coll); + orte_routed.get_routing_list(&coll); } /* setup the launch */ diff --git a/orte/mca/rml/base/Makefile.am b/orte/mca/rml/base/Makefile.am index 4bbaa83e60..d9a03c8f79 100644 --- a/orte/mca/rml/base/Makefile.am +++ b/orte/mca/rml/base/Makefile.am @@ -11,7 +11,7 @@ # All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights # reserved. -# Copyright (c) 2016-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2016-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -26,5 +26,4 @@ headers += \ libmca_rml_la_SOURCES += \ base/rml_base_frame.c \ base/rml_base_contact.c \ - base/rml_base_msg_handlers.c \ - base/rml_base_stubs.c + base/rml_base_msg_handlers.c diff --git a/orte/mca/rml/base/base.h b/orte/mca/rml/base/base.h index 36a8046eea..6683c90876 100644 --- a/orte/mca/rml/base/base.h +++ b/orte/mca/rml/base/base.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007-2014 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -65,18 +65,9 @@ ORTE_DECLSPEC int orte_rml_base_select(void); /* * globals that might be needed */ -/* adding element to hold the active modules and components */ -typedef struct { - opal_list_item_t super; - int pri; - orte_rml_component_t *component; -} orte_rml_base_active_t; -OBJ_CLASS_DECLARATION(orte_rml_base_active_t); /* a global struct containing framework-level values */ typedef struct { - opal_list_t actives; /* list to hold the active components */ - opal_pointer_array_t conduits; /* array to hold the open conduits */ opal_list_t posted_recvs; opal_list_t unmatched_msgs; int max_retries; @@ -114,8 +105,6 @@ typedef struct { * transfers */ char *data; - /* routed module to be used */ - char *routed; } orte_rml_send_t; OBJ_CLASS_DECLARATION(orte_rml_send_t); @@ -232,52 +221,11 @@ OBJ_CLASS_DECLARATION(orte_self_send_xfer_t); OBJ_RELEASE(m); \ }while(0); -#define ORTE_RML_INVALID_CHANNEL_NUM UINT32_MAX /* common implementations */ ORTE_DECLSPEC void orte_rml_base_post_recv(int sd, short args, void *cbdata); ORTE_DECLSPEC void orte_rml_base_process_msg(int fd, short flags, void *cbdata); -/* Stub API interfaces to cycle through active plugins */ -int orte_rml_API_ping(orte_rml_conduit_t conduit_id, - const char* contact_info, - const struct timeval* tv); - -int orte_rml_API_send_nb(orte_rml_conduit_t conduit_id, - orte_process_name_t* peer, struct iovec* msg, - int count, orte_rml_tag_t tag, - orte_rml_callback_fn_t cbfunc, void* cbdata); - -int orte_rml_API_send_buffer_nb(orte_rml_conduit_t conduit_id, - orte_process_name_t* peer, - struct opal_buffer_t* buffer, - orte_rml_tag_t tag, - orte_rml_buffer_callback_fn_t cbfunc, - void* cbdata); - -void orte_rml_API_recv_nb(orte_process_name_t* peer, - orte_rml_tag_t tag, - bool persistent, - orte_rml_callback_fn_t cbfunc, - void* cbdata); -void orte_rml_API_recv_buffer_nb(orte_process_name_t* peer, - orte_rml_tag_t tag, - bool persistent, - orte_rml_buffer_callback_fn_t cbfunc, - void* cbdata); - -void orte_rml_API_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag); - -void orte_rml_API_purge(orte_process_name_t *peer); - -int orte_rml_API_query_transports(opal_list_t *providers); - -orte_rml_conduit_t orte_rml_API_open_conduit(opal_list_t *attributes); - -void orte_rml_API_close_conduit(orte_rml_conduit_t id); - -char* orte_rml_API_get_routed(orte_rml_conduit_t id); - END_C_DECLS #endif /* MCA_RML_BASE_H */ diff --git a/orte/mca/rml/base/rml_base_frame.c b/orte/mca/rml/base/rml_base_frame.c index 157cbe3267..f13e1f70df 100644 --- a/orte/mca/rml/base/rml_base_frame.c +++ b/orte/mca/rml/base/rml_base_frame.c @@ -5,7 +5,7 @@ * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -40,26 +40,10 @@ /* Initialising stub fns in the global var used by other modules */ -orte_rml_base_API_t orte_rml = { - .ping = orte_rml_API_ping, - .send_nb = orte_rml_API_send_nb, - .send_buffer_nb = orte_rml_API_send_buffer_nb, - .recv_nb = orte_rml_API_recv_nb, - .recv_buffer_nb = orte_rml_API_recv_buffer_nb, - .recv_cancel = orte_rml_API_recv_cancel, - .purge = orte_rml_API_purge, - .query_transports = orte_rml_API_query_transports, - .open_conduit = orte_rml_API_open_conduit, - .close_conduit = orte_rml_API_close_conduit, - .get_routed = orte_rml_API_get_routed -}; +orte_rml_base_module_t orte_rml = {0}; orte_rml_base_t orte_rml_base = {{{0}}}; -orte_rml_component_t *orte_rml_component = NULL; - -static bool selected = false; - static int orte_rml_base_register(mca_base_register_flag_t flags) { orte_rml_base.max_retries = 3; @@ -82,62 +66,9 @@ static int orte_rml_base_register(mca_base_register_flag_t flags) return ORTE_SUCCESS; } -static void cleanup(int sd, short args, void *cbdata) -{ - orte_lock_t *lk = (orte_lock_t*)cbdata; - - ORTE_ACQUIRE_OBJECT(active); - OPAL_LIST_DESTRUCT(&orte_rml_base.posted_recvs); - if (NULL != lk) { - ORTE_POST_OBJECT(lk); - ORTE_WAKEUP_THREAD(lk); - } -} - static int orte_rml_base_close(void) { - orte_lock_t lock; - int idx, total_conduits = opal_pointer_array_get_size(&orte_rml_base.conduits); - orte_rml_base_module_t *mod; - orte_rml_component_t *comp; - - /* cycle thru the conduits opened and call each module's finalize */ - /* The components finalise/close() will be responsible for freeing the module pointers */ - for (idx = 0; idx < total_conduits ; idx++) - { - if( NULL != (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits,idx))) { - /* close the conduit */ - comp = (orte_rml_component_t*)mod->component; - if (NULL != comp && NULL != comp->close_conduit) { - comp->close_conduit(mod); - } - free(mod); - } - - } - OBJ_DESTRUCT(&orte_rml_base.conduits); - - OPAL_LIST_DESTRUCT(&orte_rml_base.actives) - - /* because the RML posted recvs list is in a separate - * async thread for apps, we can't just destruct it here. - * Instead, we push it into that event thread and destruct - * it there */ - if (ORTE_PROC_IS_APP) { - opal_event_t ev; - ORTE_CONSTRUCT_LOCK(&lock); - opal_event_set(orte_event_base, &ev, -1, - OPAL_EV_WRITE, cleanup, (void*)&lock); - opal_event_set_priority(&ev, ORTE_ERROR_PRI); - ORTE_POST_OBJECT(ev); - opal_event_active(&ev, OPAL_EV_WRITE, 1); - ORTE_WAIT_THREAD(&lock); - ORTE_DESTRUCT_LOCK(&lock); - } else { - /* we can call the destruct directly */ - cleanup(0, 0, NULL); - } - + OPAL_LIST_DESTRUCT(&orte_rml_base.posted_recvs); return mca_base_framework_components_close(&orte_rml_base_framework, NULL); } @@ -145,11 +76,8 @@ static int orte_rml_base_open(mca_base_open_flag_t flags) { /* Initialize globals */ /* construct object for holding the active plugin modules */ - OBJ_CONSTRUCT(&orte_rml_base.actives, opal_list_t); OBJ_CONSTRUCT(&orte_rml_base.posted_recvs, opal_list_t); OBJ_CONSTRUCT(&orte_rml_base.unmatched_msgs, opal_list_t); - OBJ_CONSTRUCT(&orte_rml_base.conduits, opal_pointer_array_t); - opal_pointer_array_init(&orte_rml_base.conduits,1,INT16_MAX,1); /* Open up all available components */ return mca_base_framework_components_open(&orte_rml_base_framework, flags); @@ -159,61 +87,28 @@ MCA_BASE_FRAMEWORK_DECLARE(orte, rml, "ORTE Run-Time Messaging Layer", orte_rml_base_register, orte_rml_base_open, orte_rml_base_close, mca_rml_base_static_components, 0); -OBJ_CLASS_INSTANCE(orte_rml_base_active_t, - opal_list_item_t, - NULL, NULL); - /** * Function for ordering the component(plugin) by priority */ int orte_rml_base_select(void) { - mca_base_component_list_item_t *cli=NULL; - orte_rml_component_t *component=NULL; - orte_rml_base_active_t *newmodule, *mod; - bool inserted; + orte_rml_component_t *best_component = NULL; + orte_rml_base_module_t *best_module = NULL; - if (selected) { - return ORTE_SUCCESS; - } - selected = true; - - OPAL_LIST_FOREACH(cli, &orte_rml_base_framework.framework_components, mca_base_component_list_item_t ) { - component = (orte_rml_component_t*) cli->cli_component; - - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "orte_rml_base_select: Initializing %s component %s", - component->base.mca_type_name, - component->base.mca_component_name); - - /* add to the list of available components */ - newmodule = OBJ_NEW(orte_rml_base_active_t); - newmodule->pri = component->priority; - newmodule->component = component; - - /* maintain priority order */ - inserted = false; - OPAL_LIST_FOREACH(mod, &orte_rml_base.actives, orte_rml_base_active_t) { - if (newmodule->pri > mod->pri) { - opal_list_insert_pos(&orte_rml_base.actives, - (opal_list_item_t*)mod, &newmodule->super); - inserted = true; - break; - } - } - if (!inserted) { - /* must be lowest priority - add to end */ - opal_list_append(&orte_rml_base.actives, &newmodule->super); - } + /* + * Select the best component + */ + if( OPAL_SUCCESS != mca_base_select("rml", orte_rml_base_framework.framework_output, + &orte_rml_base_framework.framework_components, + (mca_base_module_t **) &best_module, + (mca_base_component_t **) &best_component, NULL) ) { + /* This will only happen if no component was selected */ + /* If we didn't find one to select, that is an error */ + return ORTE_ERROR; } - if (4 < opal_output_get_verbosity(orte_rml_base_framework.framework_output)) { - opal_output(0, "%s: Final rml priorities", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - /* show the prioritized list */ - OPAL_LIST_FOREACH(mod, &orte_rml_base.actives, orte_rml_base_active_t) { - opal_output(0, "\tComponent: %s Priority: %d", mod->component->base.mca_component_name, mod->pri); - } - } + /* Save the winner */ + orte_rml = *best_module; return ORTE_SUCCESS; } @@ -279,17 +174,10 @@ static void send_cons(orte_rml_send_t *ptr) ptr->buffer = NULL; ptr->data = NULL; ptr->seq_num = 0xFFFFFFFF; - ptr->routed = NULL; -} -static void send_des(orte_rml_send_t *ptr) -{ - if (NULL != ptr->routed) { - free(ptr->routed); - } } OBJ_CLASS_INSTANCE(orte_rml_send_t, opal_list_item_t, - send_cons, send_des); + send_cons, NULL); static void send_req_cons(orte_rml_send_request_t *ptr) @@ -353,21 +241,3 @@ static void prq_des(orte_rml_recv_request_t *ptr) OBJ_CLASS_INSTANCE(orte_rml_recv_request_t, opal_object_t, prq_cons, prq_des); - -static void pthcons(orte_rml_pathway_t *p) -{ - p->component = NULL; - OBJ_CONSTRUCT(&p->attributes, opal_list_t); - OBJ_CONSTRUCT(&p->transports, opal_list_t); -} -static void pthdes(orte_rml_pathway_t *p) -{ - if (NULL != p->component) { - free(p->component); - } - OPAL_LIST_DESTRUCT(&p->attributes); - OPAL_LIST_DESTRUCT(&p->transports); -} -OBJ_CLASS_INSTANCE(orte_rml_pathway_t, - opal_list_item_t, - pthcons, pthdes); diff --git a/orte/mca/rml/base/rml_base_msg_handlers.c b/orte/mca/rml/base/rml_base_msg_handlers.c index facc1e41c5..4a072d1757 100644 --- a/orte/mca/rml/base/rml_base_msg_handlers.c +++ b/orte/mca/rml/base/rml_base_msg_handlers.c @@ -189,8 +189,7 @@ void orte_rml_base_process_msg(int fd, short flags, void *cbdata) return; } - if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &msg->sender, buffer, + if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(&msg->sender, buffer, ORTE_RML_TAG_NODE_REGEX_REPORT, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); diff --git a/orte/mca/rml/base/rml_base_stubs.c b/orte/mca/rml/base/rml_base_stubs.c deleted file mode 100644 index 00a8e8f0c5..0000000000 --- a/orte/mca/rml/base/rml_base_stubs.c +++ /dev/null @@ -1,333 +0,0 @@ -/* - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2015-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include - -#include "opal/class/opal_list.h" -#include "opal/dss/dss.h" -#include "orte/mca/mca.h" -#include "opal/mca/base/mca_base_component_repository.h" -#include "opal/util/argv.h" -#include "opal/util/output.h" - -#include "orte/mca/rml/rml.h" -#include "orte/mca/state/state.h" -#include "orte/runtime/orte_wait.h" -#include "orte/util/name_fns.h" -#include "orte/util/threads.h" - -#include "orte/mca/rml/base/base.h" - -/* - * The stub API interface functions - */ - -/** Open a conduit - check if the ORTE_RML_INCLUDE_COMP attribute is provided, this is */ -/* a comma seperated list of components, try to open the conduit in this order. */ -/* if the ORTE_RML_INCLUDE_COMP is not provided or this list was not able to open conduit */ -/* call the open_conduit() of the component in priority order to see if they can use the */ -/* attribute to open a conduit. */ -/* Note: The component takes care of checking for duplicate and returning the previously */ -/* opened module* in case of duplicates. Currently we are saving it in a new conduit_id */ -/* even if it is duplicate. [ToDo] compare the module* received from component to see if */ -/* already present in array and return the prev conduit_id instead of adding it again to array */ -/* @param[in] attributes The attributes is a list of opal_value_t of type OPAL_STRING */ -orte_rml_conduit_t orte_rml_API_open_conduit(opal_list_t *attributes) -{ - orte_rml_base_active_t *active; - orte_rml_component_t *comp; - orte_rml_base_module_t *mod, *ourmod=NULL; - int rc; - - opal_output_verbose(10,orte_rml_base_framework.framework_output, - "%s rml:base:open_conduit", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - - /* bozo check - you cannot specify both include and exclude */ - if (orte_get_attribute(attributes, ORTE_RML_INCLUDE_COMP_ATTRIB, NULL, OPAL_STRING) && - orte_get_attribute(attributes, ORTE_RML_EXCLUDE_COMP_ATTRIB, NULL, OPAL_STRING)) { - // orte_show_help(); - return ORTE_ERR_NOT_SUPPORTED; - } - - /* cycle thru the actives in priority order and let each one see if they can support this request */ - OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) { - comp = (orte_rml_component_t *)active->component; - if (NULL != comp->open_conduit) { - if (NULL != (mod = comp->open_conduit(attributes))) { - opal_output_verbose(2, orte_rml_base_framework.framework_output, - "%s rml:base:open_conduit Component %s provided a conduit", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - active->component->base.mca_component_name); - ourmod = mod; - break; - } - } - } - if (NULL != ourmod) { - /* we got an answer - store this conduit in our array */ - rc = opal_pointer_array_add(&orte_rml_base.conduits, ourmod); - if (rc < 0) { - return ORTE_RML_CONDUIT_INVALID; - } - return rc; - } - /* we get here if nobody could support it */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED); - return ORTE_RML_CONDUIT_INVALID; -} - - - -/** Shutdown the communication system and clean up resources */ -void orte_rml_API_close_conduit(orte_rml_conduit_t id) -{ - orte_rml_base_module_t *mod; - orte_rml_component_t *comp; - - opal_output_verbose(10,orte_rml_base_framework.framework_output, - "%s rml:base:close_conduit(%d)", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)id); - - if( NULL != (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, id))) { - comp = (orte_rml_component_t*)mod->component; - if (NULL != comp && NULL != comp->close_conduit) { - comp->close_conduit(mod); - } - opal_pointer_array_set_item(&orte_rml_base.conduits, id, NULL); - free(mod); - } -} - - - -/** Ping process for connectivity check */ -int orte_rml_API_ping(orte_rml_conduit_t conduit_id, - const char* contact_info, - const struct timeval* tv) -{ - int rc = ORTE_ERR_UNREACH; - orte_rml_base_module_t *mod; - - opal_output_verbose(10,orte_rml_base_framework.framework_output, - "%s rml:base:ping(conduit-%d)", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),conduit_id); - - /* get the module */ - if (NULL == (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, conduit_id))) { - return rc; - } - if (NULL == mod->ping) { - return rc; - } - rc = mod->ping((struct orte_rml_base_module_t*)mod, contact_info, tv); - return rc; -} - - -/** Send non-blocking iovec message through a specific conduit*/ -int orte_rml_API_send_nb(orte_rml_conduit_t conduit_id, - orte_process_name_t* peer, - struct iovec* msg, - int count, - orte_rml_tag_t tag, - orte_rml_callback_fn_t cbfunc, - void* cbdata) -{ - int rc = ORTE_ERR_UNREACH; - orte_rml_base_module_t *mod; - - opal_output_verbose(10,orte_rml_base_framework.framework_output, - "%s rml:base:send_nb() to peer %s through conduit %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer),conduit_id); - /* get the module */ - if (NULL == (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, conduit_id))) { - return rc; - } - if (NULL == mod->send_nb) { - return rc; - } - rc = mod->send_nb((struct orte_rml_base_module_t*)mod, peer, msg, count, tag, cbfunc, cbdata); - return rc; -} - -/** Send non-blocking buffer message */ -int orte_rml_API_send_buffer_nb(orte_rml_conduit_t conduit_id, - orte_process_name_t* peer, - struct opal_buffer_t* buffer, - orte_rml_tag_t tag, - orte_rml_buffer_callback_fn_t cbfunc, - void* cbdata) -{ - int rc = ORTE_ERR_UNREACH; - orte_rml_base_module_t *mod; - - opal_output_verbose(10,orte_rml_base_framework.framework_output, - "%s rml:base:send_buffer_nb() to peer %s through conduit %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer),conduit_id); - - /* get the module */ - if (NULL == (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, conduit_id))) { - return rc; - } - if (NULL == mod->send_buffer_nb) { - return rc; - } - rc = mod->send_buffer_nb((struct orte_rml_base_module_t*)mod, peer, buffer, tag, cbfunc, cbdata); - return rc; -} - -/** post a receive for an IOV message - this is done - * strictly in the base, and so it does not go to a module */ -void orte_rml_API_recv_nb(orte_process_name_t* peer, - orte_rml_tag_t tag, - bool persistent, - orte_rml_callback_fn_t cbfunc, - void* cbdata) -{ - orte_rml_recv_request_t *req; - - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s rml_recv_nb for peer %s tag %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer), tag); - - /* push the request into the event base so we can add - * the receive to our list of posted recvs */ - req = OBJ_NEW(orte_rml_recv_request_t); - req->post->buffer_data = false; - req->post->peer.jobid = peer->jobid; - req->post->peer.vpid = peer->vpid; - req->post->tag = tag; - req->post->persistent = persistent; - req->post->cbfunc.iov = cbfunc; - req->post->cbdata = cbdata; - ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI); -} - -/** Receive non-blocking buffer message */ -void orte_rml_API_recv_buffer_nb(orte_process_name_t* peer, - orte_rml_tag_t tag, - bool persistent, - orte_rml_buffer_callback_fn_t cbfunc, - void* cbdata) -{ - orte_rml_recv_request_t *req; - - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s rml_recv_buffer_nb for peer %s tag %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer), tag); - - /* push the request into the event base so we can add - * the receive to our list of posted recvs */ - req = OBJ_NEW(orte_rml_recv_request_t); - req->post->buffer_data = true; - req->post->peer.jobid = peer->jobid; - req->post->peer.vpid = peer->vpid; - req->post->tag = tag; - req->post->persistent = persistent; - req->post->cbfunc.buffer = cbfunc; - req->post->cbdata = cbdata; - ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI); -} - -/** Cancel posted non-blocking receive */ -void orte_rml_API_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag) -{ - orte_rml_recv_request_t *req; - - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s rml_recv_cancel for peer %s tag %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer), tag); - - ORTE_ACQUIRE_OBJECT(orte_event_base_active); - if (!orte_event_base_active) { - /* no event will be processed any more, so simply return. */ - return; - } - - /* push the request into the event base so we can remove - * the receive from our list of posted recvs */ - req = OBJ_NEW(orte_rml_recv_request_t); - req->cancel = true; - req->post->peer.jobid = peer->jobid; - req->post->peer.vpid = peer->vpid; - req->post->tag = tag; - ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI); -} - -/** Purge information */ -void orte_rml_API_purge(orte_process_name_t *peer) -{ - orte_rml_base_module_t *mod; - int i; - - for (i=0; i < orte_rml_base.conduits.size; i++) { - /* get the module */ - if (NULL != (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, i))) { - if (NULL != mod->purge) { - mod->purge(peer); - } - } - } -} - - -int orte_rml_API_query_transports(opal_list_t *providers) -{ - - orte_rml_base_active_t *active; - orte_rml_pathway_t *p; - - opal_output_verbose(10,orte_rml_base_framework.framework_output, - "%s rml:base:orte_rml_API_query_transports()", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - - /* cycle thru the actives */ - OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) { - if (NULL != active->component->query_transports) { - opal_output_verbose(10,orte_rml_base_framework.framework_output, - "\n calling module: %s->query_transports() \n", - active->component->base.mca_component_name); - if (NULL != (p = active->component->query_transports())) { - /* pass the results across */ - OBJ_RETAIN(p); - opal_list_append(providers, &p->super); - } - } - } - return ORTE_SUCCESS; - -} - -char* orte_rml_API_get_routed(orte_rml_conduit_t id) -{ - orte_rml_base_module_t *mod; - - /* get the module */ - if (NULL != (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, id))) { - return mod->routed; - } - - return NULL; -} diff --git a/orte/mca/rml/oob/rml_oob.h b/orte/mca/rml/oob/rml_oob.h index 5958bf5d9e..90ca2285af 100644 --- a/orte/mca/rml/oob/rml_oob.h +++ b/orte/mca/rml/oob/rml_oob.h @@ -14,7 +14,7 @@ * All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -48,25 +48,19 @@ ORTE_MODULE_DECLSPEC extern orte_rml_component_t mca_rml_oob_component; void orte_rml_oob_fini(struct orte_rml_base_module_t *mod); -int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod, - orte_process_name_t* peer, +int orte_rml_oob_send_nb(orte_process_name_t* peer, struct iovec* msg, int count, orte_rml_tag_t tag, orte_rml_callback_fn_t cbfunc, void* cbdata); -int orte_rml_oob_send_buffer_nb(struct orte_rml_base_module_t *mod, - orte_process_name_t* peer, +int orte_rml_oob_send_buffer_nb(orte_process_name_t* peer, opal_buffer_t* buffer, orte_rml_tag_t tag, orte_rml_buffer_callback_fn_t cbfunc, void* cbdata); -int orte_rml_oob_ping(struct orte_rml_base_module_t *mod, - const char* uri, - const struct timeval* tv); - END_C_DECLS #endif diff --git a/orte/mca/rml/oob/rml_oob_component.c b/orte/mca/rml/oob/rml_oob_component.c index ee058d2d01..2e1bcf94ce 100644 --- a/orte/mca/rml/oob/rml_oob_component.c +++ b/orte/mca/rml/oob/rml_oob_component.c @@ -13,7 +13,7 @@ * Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -57,9 +57,8 @@ static int rml_oob_open(void); static int rml_oob_close(void); -static orte_rml_base_module_t* open_conduit(opal_list_t *attributes); -static orte_rml_pathway_t* query_transports(void); -static void close_conduit(orte_rml_base_module_t *mod); +static int component_query(mca_base_module_t **module, int *priority); + /** * component definition */ @@ -75,220 +74,121 @@ orte_rml_component_t mca_rml_oob_component = { ORTE_RELEASE_VERSION), .mca_open_component = rml_oob_open, .mca_close_component = rml_oob_close, + .mca_query_component = component_query, }, .data = { /* The component is checkpoint ready */ MCA_BASE_METADATA_PARAM_CHECKPOINT }, - .priority = 5, - .open_conduit = open_conduit, - .query_transports = query_transports, - .close_conduit = close_conduit + .priority = 5 }; /* Local variables */ -static orte_rml_pathway_t pathway; +static void recv_nb(orte_process_name_t* peer, + orte_rml_tag_t tag, + bool persistent, + orte_rml_callback_fn_t cbfunc, + void* cbdata) +{ + orte_rml_recv_request_t *req; + + opal_output_verbose(10, orte_rml_base_framework.framework_output, + "%s rml_recv_nb for peer %s tag %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(peer), tag); + + /* push the request into the event base so we can add + * the receive to our list of posted recvs */ + req = OBJ_NEW(orte_rml_recv_request_t); + req->post->buffer_data = false; + req->post->peer.jobid = peer->jobid; + req->post->peer.vpid = peer->vpid; + req->post->tag = tag; + req->post->persistent = persistent; + req->post->cbfunc.iov = cbfunc; + req->post->cbdata = cbdata; + ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI); +} +static void recv_buffer_nb(orte_process_name_t* peer, + orte_rml_tag_t tag, + bool persistent, + orte_rml_buffer_callback_fn_t cbfunc, + void* cbdata) +{ + orte_rml_recv_request_t *req; + + opal_output_verbose(10, orte_rml_base_framework.framework_output, + "%s rml_recv_buffer_nb for peer %s tag %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(peer), tag); + + /* push the request into the event base so we can add + * the receive to our list of posted recvs */ + req = OBJ_NEW(orte_rml_recv_request_t); + req->post->buffer_data = true; + req->post->peer.jobid = peer->jobid; + req->post->peer.vpid = peer->vpid; + req->post->tag = tag; + req->post->persistent = persistent; + req->post->cbfunc.buffer = cbfunc; + req->post->cbdata = cbdata; + ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI); +} +static void recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag) +{ + orte_rml_recv_request_t *req; + + opal_output_verbose(10, orte_rml_base_framework.framework_output, + "%s rml_recv_cancel for peer %s tag %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(peer), tag); + + ORTE_ACQUIRE_OBJECT(orte_event_base_active); + if (!orte_event_base_active) { + /* no event will be processed any more, so simply return. */ + return; + } + + /* push the request into the event base so we can remove + * the receive from our list of posted recvs */ + req = OBJ_NEW(orte_rml_recv_request_t); + req->cancel = true; + req->post->peer.jobid = peer->jobid; + req->post->peer.vpid = peer->vpid; + req->post->tag = tag; + ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI); +} +static int oob_ping(const char* uri, const struct timeval* tv) +{ + return ORTE_ERR_UNREACH; +} + static orte_rml_base_module_t base_module = { .component = (struct orte_rml_component_t*)&mca_rml_oob_component, - .ping = NULL, + .ping = oob_ping, .send_nb = orte_rml_oob_send_nb, .send_buffer_nb = orte_rml_oob_send_buffer_nb, + .recv_nb = recv_nb, + .recv_buffer_nb = recv_buffer_nb, + .recv_cancel = recv_cancel, .purge = NULL }; static int rml_oob_open(void) { - /* ask our OOB transports for their info */ - OBJ_CONSTRUCT(&pathway, orte_rml_pathway_t); - pathway.component = strdup("oob"); - ORTE_OOB_GET_TRANSPORTS(&pathway.transports); - /* add any component attributes of our own */ - return ORTE_SUCCESS; } static int rml_oob_close(void) { - /* cleanup */ - OBJ_DESTRUCT(&pathway); - return ORTE_SUCCESS; } -static orte_rml_base_module_t* make_module(void) +static int component_query(mca_base_module_t **module, int *priority) { - orte_rml_oob_module_t *mod; - - /* create a new module */ - mod = (orte_rml_oob_module_t*)malloc(sizeof(orte_rml_oob_module_t)); - if (NULL == mod) { - return NULL; - } - - /* copy the APIs over to it */ - memcpy(mod, &base_module, sizeof(base_module)); - - /* initialize its internal storage */ - OBJ_CONSTRUCT(&mod->queued_routing_messages, opal_list_t); - mod->timer_event = NULL; - mod->routed = NULL; - - /* return the result */ - return (orte_rml_base_module_t*)mod; -} - -static orte_rml_base_module_t* open_conduit(opal_list_t *attributes) -{ - char *comp_attrib; - char **comps; - int i; - orte_rml_base_module_t *md; - - opal_output_verbose(20,orte_rml_base_framework.framework_output, - "%s - Entering rml_oob_open_conduit()", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - - /* someone may require this specific component, so look for "oob" */ - comp_attrib = NULL; - if (orte_get_attribute(attributes, ORTE_RML_INCLUDE_COMP_ATTRIB, (void**)&comp_attrib, OPAL_STRING) && - NULL != comp_attrib) { - /* they specified specific components - could be multiple */ - comps = opal_argv_split(comp_attrib, ','); - free(comp_attrib); - for (i=0; NULL != comps[i]; i++) { - if (0 == strcasecmp(comps[i], "oob")) { - /* we are a candidate */ - opal_argv_free(comps); - md = make_module(); - free(comp_attrib); - comp_attrib = NULL; - orte_get_attribute(attributes, ORTE_RML_ROUTED_ATTRIB, (void**)&comp_attrib, OPAL_STRING); - /* the routed system understands a NULL request, so no need to check - * return status/value here */ - md->routed = orte_routed.assign_module(comp_attrib); - if (NULL != comp_attrib) { - free(comp_attrib); - } - return md; - } - } - /* we are not a candidate */ - opal_argv_free(comps); - free(comp_attrib); - return NULL; - } - - comp_attrib = NULL; - if (orte_get_attribute(attributes, ORTE_RML_EXCLUDE_COMP_ATTRIB, (void**)&comp_attrib, OPAL_STRING) && - NULL != comp_attrib) { - /* see if we are on the list */ - comps = opal_argv_split(comp_attrib, ','); - free(comp_attrib); - for (i=0; NULL != comps[i]; i++) { - if (0 == strcasecmp(comps[i], "oob")) { - /* we cannot be a candidate */ - opal_argv_free(comps); - free(comp_attrib); - return NULL; - } - } - } - - /* Alternatively, check the attributes to see if we qualify - we only handle - * "Ethernet" and "TCP" */ - comp_attrib = NULL; - if (orte_get_attribute(attributes, ORTE_RML_TRANSPORT_TYPE, (void**)&comp_attrib, OPAL_STRING) && - NULL != comp_attrib) { - comps = opal_argv_split(comp_attrib, ','); - for (i=0; NULL != comps[i]; i++) { - if (0 == strcasecmp(comps[i], "Ethernet") || - 0 == strcasecmp(comps[i], "oob")) { - /* we are a candidate */ - opal_argv_free(comps); - md = make_module(); - free(comp_attrib); - comp_attrib = NULL; - orte_get_attribute(attributes, ORTE_RML_ROUTED_ATTRIB, (void**)&comp_attrib, OPAL_STRING); - /* the routed system understands a NULL request, so no need to check - * return status/value here */ - md->routed = orte_routed.assign_module(comp_attrib); - if (NULL != comp_attrib) { - free(comp_attrib); - } - return md; - } - } - /* we are not a candidate */ - opal_argv_free(comps); - free(comp_attrib); - return NULL; - } - - comp_attrib = NULL; - if (orte_get_attribute(attributes, ORTE_RML_PROTOCOL_TYPE, (void**)&comp_attrib, OPAL_STRING) && - NULL != comp_attrib) { - comps = opal_argv_split(comp_attrib, ','); - for (i=0; NULL != comps[i]; i++) { - if (0 == strcasecmp(comps[i], "TCP")) { - /* we are a candidate */ - opal_argv_free(comps); - md = make_module(); - free(comp_attrib); - comp_attrib = NULL; - orte_get_attribute(attributes, ORTE_RML_ROUTED_ATTRIB, (void**)&comp_attrib, OPAL_STRING); - /* the routed system understands a NULL request, so no need to check - * return status/value here */ - md->routed = orte_routed.assign_module(comp_attrib); - if (NULL != comp_attrib) { - free(comp_attrib); - } - return md; - } - } - /* we are not a candidate */ - opal_argv_free(comps); - free(comp_attrib); - return NULL; - } - - /* if they didn't specify a protocol or a transport, then we can be considered */ - if (!orte_get_attribute(attributes, ORTE_RML_TRANSPORT_TYPE, NULL, OPAL_STRING) || - !orte_get_attribute(attributes, ORTE_RML_PROTOCOL_TYPE, NULL, OPAL_STRING)) { - md = make_module(); - md->routed = orte_routed.assign_module(NULL); - return md; - } - - /* if we get here, we cannot handle it */ - return NULL; -} - -static orte_rml_pathway_t* query_transports(void) -{ - /* if we have any available transports, make them available */ - if (0 < opal_list_get_size(&pathway.transports)) { - return &pathway; - } - /* if not, then return NULL */ - return NULL; -} - -static void close_conduit(orte_rml_base_module_t *md) -{ - orte_rml_oob_module_t *mod = (orte_rml_oob_module_t*)md; - - /* cleanup the list of messages */ - OBJ_DESTRUCT(&mod->queued_routing_messages); - - /* clear the storage */ - if (NULL != mod->routed) { - free(mod->routed); - mod->routed = NULL; - } - - /* the rml_base_stub takes care of clearing the base receive - * and free'ng the module */ - return; + *priority = 50; + *module = (mca_base_module_t *) &base_module; + return ORTE_SUCCESS; } diff --git a/orte/mca/rml/oob/rml_oob_send.c b/orte/mca/rml/oob/rml_oob_send.c index 7e5330e944..577354f215 100644 --- a/orte/mca/rml/oob/rml_oob_send.c +++ b/orte/mca/rml/oob/rml_oob_send.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -70,8 +70,7 @@ static void send_self_exe(int fd, short args, void* data) OBJ_RELEASE(xfer); } -int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod, - orte_process_name_t* peer, +int orte_rml_oob_send_nb(orte_process_name_t* peer, struct iovec* iov, int count, orte_rml_tag_t tag, @@ -170,7 +169,6 @@ int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod, snd->count = count; snd->cbfunc.iov = cbfunc; snd->cbdata = cbdata; - snd->routed = strdup(mod->routed); /* activate the OOB send state */ ORTE_OOB_SEND(snd); @@ -178,8 +176,7 @@ int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod, return ORTE_SUCCESS; } -int orte_rml_oob_send_buffer_nb(struct orte_rml_base_module_t *mod, - orte_process_name_t* peer, +int orte_rml_oob_send_buffer_nb(orte_process_name_t* peer, opal_buffer_t* buffer, orte_rml_tag_t tag, orte_rml_buffer_callback_fn_t cbfunc, @@ -259,7 +256,6 @@ int orte_rml_oob_send_buffer_nb(struct orte_rml_base_module_t *mod, snd->buffer = buffer; snd->cbfunc.buffer = cbfunc; snd->cbdata = cbdata; - snd->routed = strdup(mod->routed); /* activate the OOB send state */ ORTE_OOB_SEND(snd); diff --git a/orte/mca/rml/rml.h b/orte/mca/rml/rml.h index 2eb1232652..9694add627 100644 --- a/orte/mca/rml/rml.h +++ b/orte/mca/rml/rml.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * @@ -56,10 +56,6 @@ BEGIN_C_DECLS /* ******************************************************************** */ -/* forward declare */ -struct orte_rml_base_module_t; -struct orte_rml_component_t; - typedef struct { opal_object_t super; orte_process_name_t name; @@ -173,8 +169,7 @@ typedef void (*orte_rml_exception_callback_t)(orte_process_name_t* peer, * from the local process * @retval ORTE_ERROR An unspecified error occurred during the update */ -typedef int (*orte_rml_module_ping_fn_t)(struct orte_rml_base_module_t *mod, - const char* contact_info, +typedef int (*orte_rml_module_ping_fn_t)(const char* contact_info, const struct timeval* tv); @@ -201,8 +196,7 @@ typedef int (*orte_rml_module_ping_fn_t)(struct orte_rml_base_module_t *mod, * receiving process is not available * @retval ORTE_ERROR An unspecified error occurred */ -typedef int (*orte_rml_module_send_nb_fn_t)(struct orte_rml_base_module_t *mod, - orte_process_name_t* peer, +typedef int (*orte_rml_module_send_nb_fn_t)(orte_process_name_t* peer, struct iovec* msg, int count, orte_rml_tag_t tag, @@ -232,8 +226,7 @@ typedef int (*orte_rml_module_send_nb_fn_t)(struct orte_rml_base_module_t *mod, * receiving process is not available * @retval ORTE_ERROR An unspecified error occurred */ -typedef int (*orte_rml_module_send_buffer_nb_fn_t)(struct orte_rml_base_module_t *mod, - orte_process_name_t* peer, +typedef int (*orte_rml_module_send_buffer_nb_fn_t)(orte_process_name_t* peer, struct opal_buffer_t* buffer, orte_rml_tag_t tag, orte_rml_buffer_callback_fn_t cbfunc, @@ -247,6 +240,49 @@ typedef int (*orte_rml_module_send_buffer_nb_fn_t)(struct orte_rml_base_module_t typedef void (*orte_rml_module_purge_fn_t)(orte_process_name_t *peer); +/** + * Receive an iovec non-blocking message + * + * @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive + * @param[in] tag User defined tag for matching send/recv + * @param[in] persistent Boolean flag indicating whether or not this is a one-time recv + * @param[in] cbfunc Callback function on message comlpetion + * @param[in] cbdata User data to provide during completion callback + */ +typedef void (*orte_rml_module_recv_nb_fn_t)(orte_process_name_t* peer, + orte_rml_tag_t tag, + bool persistent, + orte_rml_callback_fn_t cbfunc, + void* cbdata); + + +/** + * Receive a buffer non-blocking message + * + * @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive + * @param[in] tag User defined tag for matching send/recv + * @param[in] persistent Boolean flag indicating whether or not this is a one-time recv + * @param[in] cbfunc Callback function on message comlpetion + * @param[in] cbdata User data to provide during completion callback + */ +typedef void (*orte_rml_module_recv_buffer_nb_fn_t)(orte_process_name_t* peer, + orte_rml_tag_t tag, + bool persistent, + orte_rml_buffer_callback_fn_t cbfunc, + void* cbdata); + +/** + * Cancel a posted non-blocking receive + * + * Attempt to cancel a posted non-blocking receive. + * + * @param[in] peer Peer process or ORTE_NAME_WILDCARD, exactly as passed + * to the non-blocking receive call + * @param[in] tag Posted receive tag + */ +typedef void (*orte_rml_module_recv_cancel_fn_t)(orte_process_name_t* peer, + orte_rml_tag_t tag); + /** * RML internal module interface - these will be implemented by all RML components @@ -265,245 +301,21 @@ typedef struct orte_rml_base_module_t { /** Send non-blocking buffer message */ orte_rml_module_send_buffer_nb_fn_t send_buffer_nb; + orte_rml_module_recv_nb_fn_t recv_nb; + orte_rml_module_recv_buffer_nb_fn_t recv_buffer_nb; + orte_rml_module_recv_cancel_fn_t recv_cancel; + /** Purge information */ orte_rml_module_purge_fn_t purge; } orte_rml_base_module_t; -/* ******************************************************************** */ -/* RML PUBLIC MODULE API DEFINITION */ - -/** Open conduit - call each component and see if they can provide a - * conduit that can satisfy all these attributes - return the conduit id - * (a negative value indicates error) - */ -typedef orte_rml_conduit_t (*orte_rml_API_open_conduit_fn_t)(opal_list_t *attributes); - -/** - * Close a conduit - allow the component to cleanup. - */ -typedef void (*orte_rml_API_close_conduit_fn_t)(orte_rml_conduit_t id); - -/** - * Query the library to provide all the supported interfaces/transport - * providers in the current node/system. - * - * @param[out] List of providers and their attributes. -*/ -typedef int (*orte_rml_API_query_transports_fn_t)(opal_list_t *transports); - -/* query the routed module for a given conduit */ -typedef char* (*orte_rml_API_query_routed_fn_t)(orte_rml_conduit_t id); - -/** - * "Ping" another process to determine availability - * - * Ping another process to determine if it is available. This - * function only verifies that the process is alive and will allow a - * connection to the local process. It does *not* qualify as - * establishing communication with the remote process, as required by - * the note for set_contact_info(). - * - * @param[in] contact_info The contact info string for the remote process - * @param[in] tv Timeout after which the ping should be failed - * - * @retval ORTE_SUCESS The process is available and will allow connections - * from the local process - * @retval ORTE_ERROR An unspecified error occurred during the update - */ -typedef int (*orte_rml_API_ping_fn_t)(orte_rml_conduit_t conduit_id, - const char* contact_info, - const struct timeval* tv); - - -/** - * Send an iovec non-blocking message - * - * Send an array of iovecs to the specified peer. The call - * will return immediately, although the iovecs may not be modified - * until the completion callback is triggered. The iovecs *may* be - * passed to another call to send_nb before the completion callback is - * triggered. The callback being triggered does not give any - * indication of remote completion. - * - * @param[in] peer Name of receiving process - * @param[in] msg Pointer to an array of iovecs to be sent - * @param[in] count Number of iovecs in array - * @param[in] tag User defined tag for matching send/recv - * @param[in] cbfunc Callback function on message comlpetion - * @param[in] cbdata User data to provide during completion callback - * - * @retval ORTE_SUCCESS The message was successfully started - * @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid - * @retval ORTE_ERR_ADDRESSEE_UNKNOWN Contact information for the - * receiving process is not available - * @retval ORTE_ERROR An unspecified error occurred - */ -typedef int (*orte_rml_API_send_nb_fn_t)(orte_rml_conduit_t conduit_id, - orte_process_name_t* peer, - struct iovec* msg, - int count, - orte_rml_tag_t tag, - orte_rml_callback_fn_t cbfunc, - void* cbdata); - - -/** - * Send a buffer non-blocking message - * - * Send a buffer to the specified peer. The call - * will return immediately, although the buffer may not be modified - * until the completion callback is triggered. The buffer *may* be - * passed to another call to send_nb before the completion callback is - * triggered. The callback being triggered does not give any - * indication of remote completion. - * - * @param[in] peer Name of receiving process - * @param[in] buffer Pointer to buffer to be sent - * @param[in] tag User defined tag for matching send/recv - * @param[in] cbfunc Callback function on message comlpetion - * @param[in] cbdata User data to provide during completion callback - * - * @retval ORTE_SUCCESS The message was successfully started - * @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid - * @retval ORTE_ERR_ADDRESSEE_UNKNOWN Contact information for the - * receiving process is not available - * @retval ORTE_ERROR An unspecified error occurred - */ -typedef int (*orte_rml_API_send_buffer_nb_fn_t)(orte_rml_conduit_t conduit_id, - orte_process_name_t* peer, - struct opal_buffer_t* buffer, - orte_rml_tag_t tag, - orte_rml_buffer_callback_fn_t cbfunc, - void* cbdata); - -/** - * Purge the RML/OOB of contact info and pending messages - * to/from a specified process. Used when a process aborts - * and is to be restarted - */ -typedef void (*orte_rml_API_purge_fn_t)(orte_process_name_t *peer); - -/** - * Receive an iovec non-blocking message - * - * @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive - * @param[in] tag User defined tag for matching send/recv - * @param[in] persistent Boolean flag indicating whether or not this is a one-time recv - * @param[in] cbfunc Callback function on message comlpetion - * @param[in] cbdata User data to provide during completion callback - */ -typedef void (*orte_rml_API_recv_nb_fn_t)(orte_process_name_t* peer, - orte_rml_tag_t tag, - bool persistent, - orte_rml_callback_fn_t cbfunc, - void* cbdata); - - -/** - * Receive a buffer non-blocking message - * - * @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive - * @param[in] tag User defined tag for matching send/recv - * @param[in] persistent Boolean flag indicating whether or not this is a one-time recv - * @param[in] cbfunc Callback function on message comlpetion - * @param[in] cbdata User data to provide during completion callback - */ -typedef void (*orte_rml_API_recv_buffer_nb_fn_t)(orte_process_name_t* peer, - orte_rml_tag_t tag, - bool persistent, - orte_rml_buffer_callback_fn_t cbfunc, - void* cbdata); - - -/** - * Cancel a posted non-blocking receive - * - * Attempt to cancel a posted non-blocking receive. - * - * @param[in] peer Peer process or ORTE_NAME_WILDCARD, exactly as passed - * to the non-blocking receive call - * @param[in] tag Posted receive tag - */ -typedef void (*orte_rml_API_recv_cancel_fn_t)(orte_process_name_t* peer, - orte_rml_tag_t tag); - -/** - * RML API interface - */ -typedef struct { - /** Open Conduit **/ - orte_rml_API_open_conduit_fn_t open_conduit; - - /** Shutdown the conduit and clean up resources */ - orte_rml_API_close_conduit_fn_t close_conduit; - - /** Ping process for connectivity check */ - orte_rml_API_ping_fn_t ping; - - /** Send non-blocking iovec message */ - orte_rml_API_send_nb_fn_t send_nb; - - /** Send non-blocking buffer message */ - orte_rml_API_send_buffer_nb_fn_t send_buffer_nb; - - /** Receive non-blocking iovec message */ - orte_rml_API_recv_nb_fn_t recv_nb; - - /** Receive non-blocking buffer message */ - orte_rml_API_recv_buffer_nb_fn_t recv_buffer_nb; - - /** Cancel posted non-blocking receive */ - orte_rml_API_recv_cancel_fn_t recv_cancel; - - /** Purge information */ - orte_rml_API_purge_fn_t purge; - - /** Query information of transport in system */ - orte_rml_API_query_transports_fn_t query_transports; - - /* get the routed module for a given conduit */ - orte_rml_API_query_routed_fn_t get_routed; -} orte_rml_base_API_t; - /** Interface for RML communication */ -ORTE_DECLSPEC extern orte_rml_base_API_t orte_rml; +ORTE_DECLSPEC extern orte_rml_base_module_t orte_rml; /* ******************************************************************** */ /* RML COMPONENT DEFINITION */ -/** - * RML open_conduit - * - * Create an instance (module) of the given RML component. Upon - * returning, the module data structure should be fully populated and - * all functions should be usable and will have the conduit information. - * - * @param[in] opal_list_t of all attributes requested for the conduit. - * Each attribute will be key-value. - * [TODO] put in examples of the key-value here. - * @return Exactly one module created by the call to the component's - * initialization function should be returned. The module structure - * should be fully populated, and the priority should be set to a - * reasonable value. - * - * @retval NULL An error occurred and initialization did not occur - * @retval non-NULL The module was successfully initialized - */ -typedef orte_rml_base_module_t* (*orte_rml_component_open_conduit_fn_t)(opal_list_t *attributes); - -/** - * Query the library to provide all the supported interfaces/transport - * providers in the current node/system. - * - */ -typedef orte_rml_pathway_t* (*orte_rml_component_query_transports_fn_t)(void); - -/** Close conduit - allow the specific component to - * cleanup the module for this conduit - */ -typedef void (*orte_rml_module_close_conduit_fn_t)(orte_rml_base_module_t *mod); - /** * RML component interface * @@ -518,10 +330,6 @@ typedef struct orte_rml_component_t { mca_base_component_data_t data; /* Component priority */ int priority; - /* Component interface functions */ - orte_rml_component_open_conduit_fn_t open_conduit; - orte_rml_component_query_transports_fn_t query_transports; - orte_rml_module_close_conduit_fn_t close_conduit; } orte_rml_component_t; diff --git a/orte/mca/rml/rml_types.h b/orte/mca/rml/rml_types.h index 2acb03c1bb..5d14c0e8aa 100644 --- a/orte/mca/rml/rml_types.h +++ b/orte/mca/rml/rml_types.h @@ -12,7 +12,7 @@ * Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -201,19 +201,6 @@ BEGIN_C_DECLS */ typedef uint32_t orte_rml_tag_t; -/* Conduit ID */ -typedef uint16_t orte_rml_conduit_t; -#define ORTE_RML_CONDUIT_INVALID 0xff - -/* define an object for reporting transports */ -typedef struct { - opal_list_item_t super; - char *component; - opal_list_t attributes; - opal_list_t transports; -} orte_rml_pathway_t; -OBJ_CLASS_DECLARATION(orte_rml_pathway_t); - /* ******************************************************************** */ diff --git a/orte/mca/routed/base/base.h b/orte/mca/routed/base/base.h index 1f2ce533da..632bb2bb4d 100644 --- a/orte/mca/routed/base/base.h +++ b/orte/mca/routed/base/base.h @@ -1,7 +1,7 @@ /* * Copyright (c) 2007-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,38 +32,11 @@ ORTE_DECLSPEC extern mca_base_framework_t orte_routed_base_framework; ORTE_DECLSPEC int orte_routed_base_select(void); typedef struct { - opal_list_item_t super; - int pri; - orte_routed_component_t *component; - orte_routed_module_t *module; -} orte_routed_base_active_t; -OBJ_CLASS_DECLARATION(orte_routed_base_active_t); - -typedef struct { - opal_list_t actives; bool routing_enabled; } orte_routed_base_t; ORTE_DECLSPEC extern orte_routed_base_t orte_routed_base; -/* base API wrapper functions */ -ORTE_DECLSPEC char* orte_routed_base_assign_module(char *modules); - -ORTE_DECLSPEC int orte_routed_base_delete_route(char *module, orte_process_name_t *proc); -ORTE_DECLSPEC int orte_routed_base_update_route(char *module, orte_process_name_t *target, - orte_process_name_t *route); -ORTE_DECLSPEC orte_process_name_t orte_routed_base_get_route(char *module, - orte_process_name_t *target); -ORTE_DECLSPEC int orte_routed_base_route_lost(char *module, - const orte_process_name_t *route); -ORTE_DECLSPEC bool orte_routed_base_route_is_defined(char *module, - const orte_process_name_t *target); -ORTE_DECLSPEC void orte_routed_base_update_routing_plan(char *module); -ORTE_DECLSPEC void orte_routed_base_get_routing_list(char *module, opal_list_t *coll); -ORTE_DECLSPEC int orte_routed_base_set_lifeline(char *module, orte_process_name_t *proc); -ORTE_DECLSPEC size_t orte_routed_base_num_routes(char *module); -ORTE_DECLSPEC int orte_routed_base_ft_event(char *module, int state); - /* specialized support functions */ ORTE_DECLSPEC void orte_routed_base_xcast_routing(opal_list_t *coll, opal_list_t *my_children); diff --git a/orte/mca/routed/base/routed_base_fns.c b/orte/mca/routed/base/routed_base_fns.c index 73d74580a4..99c949ba10 100644 --- a/orte/mca/routed/base/routed_base_fns.c +++ b/orte/mca/routed/base/routed_base_fns.c @@ -12,7 +12,7 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -37,219 +37,6 @@ #include "orte/mca/routed/base/base.h" -char* orte_routed_base_assign_module(char *modules) -{ - orte_routed_base_active_t *active; - char **desired; - int i; - - /* the incoming param contains a comma-delimited, prioritized - * list of desired routing modules. If it is NULL, then we - * simply return the module at the top of our list */ - if (NULL == modules) { - active = (orte_routed_base_active_t*)opal_list_get_first(&orte_routed_base.actives); - return active->component->base_version.mca_component_name; - } - - /* otherwise, cycle thru the provided list of desired modules - * and pick the highest priority one that matches */ - desired = opal_argv_split(modules, ','); - for (i=0; NULL != desired[i]; i++) { - OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) { - if (0 == strcasecmp(desired[i], active->component->base_version.mca_component_name)) { - opal_argv_free(desired); - return active->component->base_version.mca_component_name; - } - } - } - opal_argv_free(desired); - - /* get here if none match */ - return NULL; -} - -int orte_routed_base_delete_route(char *module, orte_process_name_t *proc) -{ - orte_routed_base_active_t *active; - int rc; - - OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) { - if (NULL == module || - 0 == strcmp(module, active->component->base_version.mca_component_name)) { - if (NULL != active->module->delete_route) { - if (ORTE_SUCCESS != (rc = active->module->delete_route(proc))) { - return rc; - } - } - } - } - return ORTE_SUCCESS; -} - -int orte_routed_base_update_route(char *module, orte_process_name_t *target, - orte_process_name_t *route) -{ - orte_routed_base_active_t *active; - int rc; - - OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) { - if (NULL == module || - 0 == strcmp(module, active->component->base_version.mca_component_name)) { - if (NULL != active->module->update_route) { - if (ORTE_SUCCESS != (rc = active->module->update_route(target, route))) { - return rc; - } - } - } - } - return ORTE_SUCCESS; -} - -orte_process_name_t orte_routed_base_get_route(char *module, orte_process_name_t *target) -{ - orte_routed_base_active_t *active; - - /* a NULL module corresponds to direct */ - if (!orte_routed_base.routing_enabled || NULL == module) { - return *target; - } - - OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) { - if (0 == strcmp(module, active->component->base_version.mca_component_name)) { - if (NULL != active->module->get_route) { - return active->module->get_route(target); - } - return *ORTE_NAME_INVALID; - } - } - return *ORTE_NAME_INVALID; -} - -int orte_routed_base_route_lost(char *module, const orte_process_name_t *route) -{ - orte_routed_base_active_t *active; - int rc; - - OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) { - if (NULL == module || - 0 == strcmp(module, active->component->base_version.mca_component_name)) { - if (NULL != active->module->route_lost) { - if (ORTE_SUCCESS != (rc = active->module->route_lost(route))) { - return rc; - } - } - } - } - return ORTE_SUCCESS; -} - -bool orte_routed_base_route_is_defined(char *module, const orte_process_name_t *target) -{ - orte_routed_base_active_t *active; - - /* a NULL module corresponds to direct */ - if (NULL == module) { - return true; - } - - OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) { - if (0 == strcmp(module, active->component->base_version.mca_component_name)) { - if (NULL != active->module->route_is_defined) { - return active->module->route_is_defined(target); - } - break; - } - } - - /* if we didn't find the specified module, or it doesn't have - * the required API, then the route isn't defined */ - return false; -} - -void orte_routed_base_update_routing_plan(char *module) -{ - orte_routed_base_active_t *active; - - OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) { - if (NULL == module || - 0 == strcmp(module, active->component->base_version.mca_component_name)) { - if (NULL != active->module->update_routing_plan) { - active->module->update_routing_plan(); - } - } - } - - return; -} - -void orte_routed_base_get_routing_list(char *module, opal_list_t *coll) -{ - orte_routed_base_active_t *active; - - OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) { - if (NULL == module || - 0 == strcmp(module, active->component->base_version.mca_component_name)) { - if (NULL != active->module->get_routing_list) { - active->module->get_routing_list(coll); - } - } - } - return; -} - -int orte_routed_base_set_lifeline(char *module, orte_process_name_t *proc) -{ - orte_routed_base_active_t *active; - int rc; - - OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) { - if (NULL == module || - 0 == strcmp(module, active->component->base_version.mca_component_name)) { - if (NULL != active->module->set_lifeline) { - if (ORTE_SUCCESS != (rc = active->module->set_lifeline(proc))) { - return rc; - } - } - } - } - return ORTE_SUCCESS; -} - -size_t orte_routed_base_num_routes(char *module) -{ - orte_routed_base_active_t *active; - size_t rc = 0; - - OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) { - if (NULL == module || - 0 == strcmp(module, active->component->base_version.mca_component_name)) { - if (NULL != active->module->num_routes) { - rc += active->module->num_routes(); - } - } - } - return rc; -} - -int orte_routed_base_ft_event(char *module, int state) -{ - orte_routed_base_active_t *active; - int rc; - - OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) { - if (NULL == module || - 0 == strcmp(module, active->component->base_version.mca_component_name)) { - if (NULL != active->module->ft_event) { - if (ORTE_SUCCESS != (rc = active->module->ft_event(state))) { - return rc; - } - } - } - } - return ORTE_SUCCESS; -} - - void orte_routed_base_xcast_routing(opal_list_t *coll, opal_list_t *my_children) { orte_routed_tree_t *child; diff --git a/orte/mca/routed/base/routed_base_frame.c b/orte/mca/routed/base/routed_base_frame.c index 5069697975..3c61600a4f 100644 --- a/orte/mca/routed/base/routed_base_frame.c +++ b/orte/mca/routed/base/routed_base_frame.c @@ -10,7 +10,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -39,25 +39,11 @@ * component's public mca_base_component_t struct. */ #include "orte/mca/routed/base/static-components.h" -orte_routed_base_t orte_routed_base = {{{0}}}; -orte_routed_API_t orte_routed = { - .assign_module = orte_routed_base_assign_module, - .delete_route = orte_routed_base_delete_route, - .update_route = orte_routed_base_update_route, - .get_route = orte_routed_base_get_route, - .route_lost = orte_routed_base_route_lost, - .route_is_defined = orte_routed_base_route_is_defined, - .set_lifeline = orte_routed_base_set_lifeline, - .update_routing_plan = orte_routed_base_update_routing_plan, - .get_routing_list = orte_routed_base_get_routing_list, - .num_routes = orte_routed_base_num_routes, - .ft_event = orte_routed_base_ft_event -}; +orte_routed_base_t orte_routed_base = {0}; +orte_routed_module_t orte_routed = {0}; static int orte_routed_base_open(mca_base_open_flag_t flags) { - /* setup our list of actives */ - OBJ_CONSTRUCT(&orte_routed_base.actives, opal_list_t); /* start with routing DISABLED */ orte_routed_base.routing_enabled = false; @@ -67,14 +53,10 @@ static int orte_routed_base_open(mca_base_open_flag_t flags) static int orte_routed_base_close(void) { - orte_routed_base_active_t *active; - - while (NULL != (active = (orte_routed_base_active_t *)opal_list_remove_first(&orte_routed_base.actives))) { - active->module->finalize(); - OBJ_RELEASE(active); + orte_routed_base.routing_enabled = false; + if (NULL != orte_routed.finalize) { + orte_routed.finalize(); } - OPAL_LIST_DESTRUCT(&orte_routed_base.actives); - return mca_base_framework_components_close(&orte_routed_base_framework, NULL); } @@ -82,69 +64,28 @@ MCA_BASE_FRAMEWORK_DECLARE(orte, routed, "ORTE Message Routing Subsystem", NULL, orte_routed_base_open, orte_routed_base_close, mca_routed_base_static_components, 0); -static bool selected = false; - int orte_routed_base_select(void) { - mca_base_component_list_item_t *cli=NULL; - orte_routed_component_t *component=NULL; - orte_routed_base_active_t *newmodule, *mod; - mca_base_module_t *module; - bool inserted; - int pri; + orte_routed_component_t *best_component = NULL; + orte_routed_module_t *best_module = NULL; - if (selected) { - return ORTE_SUCCESS; - } - selected = true; - - OPAL_LIST_FOREACH(cli, &orte_routed_base_framework.framework_components, mca_base_component_list_item_t ) { - component = (orte_routed_component_t*) cli->cli_component; - - opal_output_verbose(10, orte_routed_base_framework.framework_output, - "orte_routed_base_select: Initializing %s component %s", - component->base_version.mca_type_name, - component->base_version.mca_component_name); - - if (ORTE_SUCCESS != component->base_version.mca_query_component(&module, &pri)) { - continue; - } - - /* add to the list of available components */ - newmodule = OBJ_NEW(orte_routed_base_active_t); - newmodule->pri = pri; - newmodule->component = component; - newmodule->module = (orte_routed_module_t*)module; - - if (ORTE_SUCCESS != newmodule->module->initialize()) { - OBJ_RELEASE(newmodule); - continue; - } - - /* maintain priority order */ - inserted = false; - OPAL_LIST_FOREACH(mod, &orte_routed_base.actives, orte_routed_base_active_t) { - if (newmodule->pri > mod->pri) { - opal_list_insert_pos(&orte_routed_base.actives, - (opal_list_item_t*)mod, &newmodule->super); - inserted = true; - break; - } - } - if (!inserted) { - /* must be lowest priority - add to end */ - opal_list_append(&orte_routed_base.actives, &newmodule->super); - } + /* + * Select the best component + */ + if( OPAL_SUCCESS != mca_base_select("routed", orte_routed_base_framework.framework_output, + &orte_routed_base_framework.framework_components, + (mca_base_module_t **) &best_module, + (mca_base_component_t **) &best_component, NULL) ) { + /* This will only happen if no component was selected */ + /* If we didn't find one to select, that is an error */ + return ORTE_ERROR; } - if (4 < opal_output_get_verbosity(orte_routed_base_framework.framework_output)) { - opal_output(0, "%s: Final routed priorities", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - /* show the prioritized list */ - OPAL_LIST_FOREACH(mod, &orte_routed_base.actives, orte_routed_base_active_t) { - opal_output(0, "\tComponent: %s Priority: %d", mod->component->base_version.mca_component_name, mod->pri); - } + /* Save the winner */ + orte_routed = *best_module; + if (NULL != orte_routed.initialize) { + orte_routed.initialize(); } - return ORTE_SUCCESS; } @@ -160,7 +101,3 @@ static void destruct(orte_routed_tree_t *rt) OBJ_CLASS_INSTANCE(orte_routed_tree_t, opal_list_item_t, construct, destruct); - -OBJ_CLASS_INSTANCE(orte_routed_base_active_t, - opal_list_item_t, - NULL, NULL); diff --git a/orte/mca/routed/radix/routed_radix.c b/orte/mca/routed/radix/routed_radix.c index 670189014c..31c59372cf 100644 --- a/orte/mca/routed/radix/routed_radix.c +++ b/orte/mca/routed/radix/routed_radix.c @@ -6,7 +6,7 @@ * reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -223,12 +223,6 @@ static orte_process_name_t get_route(orte_process_name_t *target) goto found; } - /* if I am an application process, always route via my local daemon */ - if (ORTE_PROC_IS_APP) { - ret = ORTE_PROC_MY_DAEMON; - goto found; - } - /* if I am a tool, the route is direct if target is in * my own job family, and to the target's HNP if not */ @@ -264,7 +258,13 @@ static orte_process_name_t get_route(orte_process_name_t *target) } } - /* if the jobid is different than our own, then this the target + /* if the target is our parent, then send it direct */ + if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_PARENT, target)) { + ret = ORTE_PROC_MY_PARENT; + goto found; + } + + /* if the jobid is different than our own, then this target * is a tool and we should go direct */ if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) { ret = target; @@ -273,10 +273,15 @@ static orte_process_name_t get_route(orte_process_name_t *target) daemon.jobid = ORTE_PROC_MY_NAME->jobid; /* find out what daemon hosts this proc */ - if (ORTE_VPID_INVALID == (daemon.vpid = orte_get_proc_daemon_vpid(target))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - ret = ORTE_NAME_INVALID; - goto found; + if (ORTE_PROC_MY_NAME->jobid == target->jobid) { + /* it's a daemon - no need to look it up */ + daemon.vpid = target->vpid; + } else { + if (ORTE_VPID_INVALID == (daemon.vpid = orte_get_proc_daemon_vpid(target))) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + ret = ORTE_NAME_INVALID; + goto found; + } } /* if the daemon is me, then send direct to the target! */ diff --git a/orte/mca/routed/routed.h b/orte/mca/routed/routed.h index cc8c800619..68aff352fa 100644 --- a/orte/mca/routed/routed.h +++ b/orte/mca/routed/routed.h @@ -7,7 +7,7 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -221,47 +221,8 @@ typedef struct { orte_routed_module_ft_event_fn_t ft_event; } orte_routed_module_t; -/* define an equivalent set of API functions - these will be implemented - * as "stubs" in the framework base */ -typedef char* (*orte_routed_API_assign_module_fn_t)(char *modules); - -typedef int (*orte_routed_API_delete_route_fn_t)(char *module, - orte_process_name_t *proc); -typedef int (*orte_routed_API_update_route_fn_t)(char *module, - orte_process_name_t *target, - orte_process_name_t *route); -typedef orte_process_name_t (*orte_routed_API_get_route_fn_t)(char *module, - orte_process_name_t *target); -typedef int (*orte_routed_API_route_lost_fn_t)(char *module, - const orte_process_name_t *route); -typedef bool (*orte_routed_API_route_is_defined_fn_t)(char *module, - const orte_process_name_t *target); -typedef void (*orte_routed_API_update_routing_plan_fn_t)(char *module); -typedef void (*orte_routed_API_get_routing_list_fn_t)(char *module, opal_list_t *coll); -typedef int (*orte_routed_API_set_lifeline_fn_t)(char *module, orte_process_name_t *proc); -typedef size_t (*orte_routed_API_num_routes_fn_t)(char *module); -typedef int (*orte_routed_API_ft_event_fn_t)(char *module, int state); - - -typedef struct { - /* API functions */ - orte_routed_API_assign_module_fn_t assign_module; - orte_routed_API_delete_route_fn_t delete_route; - orte_routed_API_update_route_fn_t update_route; - orte_routed_API_get_route_fn_t get_route; - orte_routed_API_route_lost_fn_t route_lost; - orte_routed_API_route_is_defined_fn_t route_is_defined; - orte_routed_API_set_lifeline_fn_t set_lifeline; - /* fns for daemons */ - orte_routed_API_update_routing_plan_fn_t update_routing_plan; - orte_routed_API_get_routing_list_fn_t get_routing_list; - orte_routed_API_num_routes_fn_t num_routes; - /* FT Notification */ - orte_routed_API_ft_event_fn_t ft_event; -} orte_routed_API_t; - /* provide an interface to the routed framework stub functions */ -ORTE_DECLSPEC extern orte_routed_API_t orte_routed; +ORTE_DECLSPEC extern orte_routed_module_t orte_routed; /* ******************************************************************** */ diff --git a/orte/mca/snapc/base/snapc_base_fns.c b/orte/mca/snapc/base/snapc_base_fns.c index a7a0f98ec7..3217b6dd4e 100644 --- a/orte/mca/snapc/base/snapc_base_fns.c +++ b/orte/mca/snapc/base/snapc_base_fns.c @@ -9,7 +9,7 @@ * All rights reserved. * Copyright (c) 2007 Evergrid, Inc. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -767,8 +767,7 @@ int orte_snapc_base_global_coord_ckpt_update_cmd(orte_process_name_t* peer, } } - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - peer, loc_buffer, + if (0 > (ret = orte_rml.send_buffer_nb(peer, loc_buffer, ORTE_RML_TAG_CKPT, orte_rml_send_callback, NULL))) { opal_output(orte_snapc_base_framework.framework_output, diff --git a/orte/mca/state/base/state_base_fns.c b/orte/mca/state/base/state_base_fns.c index accda8edd3..3aa93c5c6c 100644 --- a/orte/mca/state/base/state_base_fns.c +++ b/orte/mca/state/base/state_base_fns.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2011-2012 Los Alamos National Security, LLC. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -508,8 +508,7 @@ void orte_state_base_notify_data_server(orte_process_name_t *target) } /* send the request to the server */ - rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &orte_pmix_server_globals.server, buf, + rc = orte_rml.send_buffer_nb(&orte_pmix_server_globals.server, buf, ORTE_RML_TAG_DATA_SERVER, orte_rml_send_callback, NULL); if (ORTE_SUCCESS != rc) { @@ -617,8 +616,7 @@ static void _send_notification(int status, ORTE_ERROR_NAME(status), ORTE_NAME_PRINT(target), ORTE_NAME_PRINT(&daemon)); - if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &daemon, buf, + if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(&daemon, buf, ORTE_RML_TAG_NOTIFICATION, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); @@ -635,7 +633,6 @@ void orte_state_base_track_procs(int fd, short argc, void *cbdata) orte_job_t *jdata; orte_proc_t *pdata; int i; - char *rtmod; orte_process_name_t parent, target; ORTE_ACQUIRE_OBJECT(caddy); @@ -648,9 +645,6 @@ void orte_state_base_track_procs(int fd, short argc, void *cbdata) ORTE_NAME_PRINT(proc), orte_proc_state_to_str(state)); - /* get our "lifeline" routed module */ - rtmod = orte_rml.get_routed(orte_mgmt_conduit); - /* get the job object for this proc */ if (NULL == (jdata = orte_get_job_data_object(proc->jobid))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); @@ -722,7 +716,7 @@ void orte_state_base_track_procs(int fd, short argc, void *cbdata) * remain (might be some from another job) */ if (orte_orteds_term_ordered && - 0 == orte_routed.num_routes(rtmod)) { + 0 == orte_routed.num_routes()) { for (i=0; i < orte_local_children->size; i++) { if (NULL != (pdata = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) && ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_ALIVE)) { @@ -783,7 +777,6 @@ void orte_state_base_check_all_complete(int fd, short args, void *cbdata) int32_t i32, *i32ptr; uint32_t u32; void *nptr; - char *rtmod; ORTE_ACQUIRE_OBJECT(caddy); jdata = caddy->jdata; @@ -793,10 +786,6 @@ void orte_state_base_check_all_complete(int fd, short args, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == jdata) ? "NULL" : ORTE_JOBID_PRINT(jdata->jobid)); - /* get our "lifeline" routed module */ - rtmod = orte_rml.get_routed(orte_mgmt_conduit); - - if (NULL == jdata || jdata->jobid == ORTE_PROC_MY_NAME->jobid) { /* just check to see if the daemons are complete */ OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output, @@ -864,7 +853,7 @@ void orte_state_base_check_all_complete(int fd, short args, void *cbdata) */ CHECK_DAEMONS: if (jdata == NULL || jdata->jobid == ORTE_PROC_MY_NAME->jobid) { - if (0 == orte_routed.num_routes(rtmod)) { + if (0 == orte_routed.num_routes()) { /* orteds are done! */ OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output, "%s orteds complete - exiting", diff --git a/orte/mca/state/hnp/state_hnp.c b/orte/mca/state/hnp/state_hnp.c index 2d3e520042..4a8855a944 100644 --- a/orte/mca/state/hnp/state_hnp.c +++ b/orte/mca/state/hnp/state_hnp.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -326,8 +326,7 @@ static void _send_notification(int status, ORTE_ERROR_NAME(status), ORTE_NAME_PRINT(target), ORTE_NAME_PRINT(&daemon)); - if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &daemon, buf, + if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(&daemon, buf, ORTE_RML_TAG_NOTIFICATION, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); diff --git a/orte/mca/state/orted/state_orted.c b/orte/mca/state/orted/state_orted.c index dfd05fea31..8846c90a1f 100644 --- a/orte/mca/state/orted/state_orted.c +++ b/orte/mca/state/orted/state_orted.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2011-2017 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -239,8 +239,7 @@ static void track_jobs(int fd, short argc, void *cbdata) } /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, alert, + if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert, ORTE_RML_TAG_PLM, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); @@ -262,7 +261,6 @@ static void track_procs(int fd, short argc, void *cbdata) opal_buffer_t *alert; int rc, i; orte_plm_cmd_flag_t cmd; - char *rtmod; orte_std_cntr_t index; orte_job_map_t *map; orte_node_t *node; @@ -333,8 +331,7 @@ static void track_procs(int fd, short argc, void *cbdata) } } /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, alert, + if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert, ORTE_RML_TAG_PLM, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); @@ -391,9 +388,8 @@ static void track_procs(int fd, short argc, void *cbdata) * gone, then terminate ourselves IF no local procs * remain (might be some from another job) */ - rtmod = orte_rml.get_routed(orte_mgmt_conduit); if (orte_orteds_term_ordered && - 0 == orte_routed.num_routes(rtmod)) { + 0 == orte_routed.num_routes()) { for (i=0; i < orte_local_children->size; i++) { if (NULL != (pdata = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) && ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_ALIVE)) { @@ -431,8 +427,7 @@ static void track_procs(int fd, short argc, void *cbdata) "%s state:orted: SENDING JOB LOCAL TERMINATION UPDATE FOR JOB %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid))); - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, alert, + if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert, ORTE_RML_TAG_PLM, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); diff --git a/orte/orted/orted_comm.c b/orte/orted/orted_comm.c index b07e86e6ab..803a3d94aa 100644 --- a/orte/orted/orted_comm.c +++ b/orte/orted/orted_comm.c @@ -123,7 +123,6 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, char string[256], *string_ptr = string; float pss; opal_pstats_t pstat; - char *rtmod; char *coprocessors; orte_job_map_t *map; int8_t flag; @@ -382,8 +381,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, /* flag that orteds were ordered to terminate */ orte_orteds_term_ordered = true; /* if all my routes and local children are gone, then terminate ourselves */ - rtmod = orte_rml.get_routed(orte_mgmt_conduit); - if (0 == (ret = orte_routed.num_routes(rtmod))) { + if (0 == (ret = orte_routed.num_routes())) { for (i=0; i < orte_local_children->size; i++) { if (NULL != (proct = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) && ORTE_FLAG_TEST(proct, ORTE_PROC_FLAG_ALIVE)) { @@ -425,8 +423,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, orte_orteds_term_ordered = true; if (ORTE_PROC_IS_HNP) { /* if all my routes and local children are gone, then terminate ourselves */ - rtmod = orte_rml.get_routed(orte_mgmt_conduit); - if (0 == orte_routed.num_routes(rtmod)) { + if (0 == orte_routed.num_routes()) { for (i=0; i < orte_local_children->size; i++) { if (NULL != (proct = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) && ORTE_FLAG_TEST(proct, ORTE_PROC_FLAG_ALIVE)) { @@ -526,8 +523,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, break; } /* send the buffer to our IOF */ - orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_NAME, iofbuf, ORTE_RML_TAG_IOF_HNP, + orte_rml.send_buffer_nb(ORTE_PROC_MY_NAME, iofbuf, ORTE_RML_TAG_IOF_HNP, orte_rml_send_callback, NULL); } for (i=1; i < orte_node_pool->size; i++) { @@ -718,8 +714,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, OBJ_DESTRUCT(&data); } /* send the data */ - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, ORTE_RML_TAG_TOPOLOGY_REPORT, + if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOPOLOGY_REPORT, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(answer); @@ -749,8 +744,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, goto CLEANUP; } - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, ORTE_RML_TAG_TOOL, + if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(answer); @@ -775,8 +769,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, OBJ_RELEASE(answer); goto CLEANUP; } - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, ORTE_RML_TAG_TOOL, + if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(answer); @@ -845,8 +838,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, rc = opal_hash_table_get_next_key_uint32(orte_job_data, &u32, (void **)&jobdat, nptr, &nptr); } } - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, ORTE_RML_TAG_TOOL, + if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(answer); @@ -872,8 +864,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, OBJ_RELEASE(answer); goto CLEANUP; } - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, ORTE_RML_TAG_TOOL, + if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(answer); @@ -942,8 +933,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, } } /* send the info */ - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, ORTE_RML_TAG_TOOL, + if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(answer); @@ -969,8 +959,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, OBJ_RELEASE(answer); goto CLEANUP; } - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, ORTE_RML_TAG_TOOL, + if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(answer); @@ -1088,8 +1077,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, } } /* send the info */ - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, ORTE_RML_TAG_TOOL, + if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(answer); @@ -1147,8 +1135,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, goto SEND_TOP_ANSWER; } /* the callback function will release relay_msg buffer */ - if (0 > orte_rml.send_buffer_nb(orte_mgmt_conduit, - &proc2, relay_msg, + if (0 > orte_rml.send_buffer_nb(&proc2, relay_msg, ORTE_RML_TAG_DAEMON, orte_rml_send_callback, NULL)) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); @@ -1199,8 +1186,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, goto SEND_TOP_ANSWER; } /* the callback function will release relay_msg buffer */ - if (0 > orte_rml.send_buffer_nb(orte_mgmt_conduit, - &proc2, relay_msg, + if (0 > orte_rml.send_buffer_nb(&proc2, relay_msg, ORTE_RML_TAG_DAEMON, orte_rml_send_callback, NULL)) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); @@ -1264,8 +1250,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, ret = ORTE_ERR_COMM_FAILURE; break; } - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - return_addr, answer, ORTE_RML_TAG_TOOL, + if (0 > (ret = orte_rml.send_buffer_nb(return_addr, answer, ORTE_RML_TAG_TOOL, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(answer); @@ -1343,8 +1328,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, free(gstack_exec); } /* always send our response */ - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, answer, + if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, answer, ORTE_RML_TAG_STACK_TRACE, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); @@ -1382,8 +1366,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, } opal_dss.pack(answer, &pss, 1, OPAL_FLOAT); /* send it back */ - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, answer, + if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, answer, ORTE_RML_TAG_MEMPROFILE, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index 197828aa5d..e30fc1b09a 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -712,19 +712,19 @@ int orte_daemon(int argc, char *argv[]) /* tell the routed module that we have a path * back to the HNP */ - if (ORTE_SUCCESS != (ret = orte_routed.update_route(NULL, ORTE_PROC_MY_HNP, ORTE_PROC_MY_PARENT))) { + if (ORTE_SUCCESS != (ret = orte_routed.update_route(ORTE_PROC_MY_HNP, ORTE_PROC_MY_PARENT))) { ORTE_ERROR_LOG(ret); goto DONE; } /* and a path to our parent */ - if (ORTE_SUCCESS != (ret = orte_routed.update_route(NULL, ORTE_PROC_MY_PARENT, ORTE_PROC_MY_PARENT))) { + if (ORTE_SUCCESS != (ret = orte_routed.update_route(ORTE_PROC_MY_PARENT, ORTE_PROC_MY_PARENT))) { ORTE_ERROR_LOG(ret); goto DONE; } /* set the lifeline to point to our parent so that we * can handle the situation if that lifeline goes away */ - if (ORTE_SUCCESS != (ret = orte_routed.set_lifeline(NULL, ORTE_PROC_MY_PARENT))) { + if (ORTE_SUCCESS != (ret = orte_routed.set_lifeline(ORTE_PROC_MY_PARENT))) { ORTE_ERROR_LOG(ret); goto DONE; } @@ -754,8 +754,7 @@ int orte_daemon(int argc, char *argv[]) node_regex_waiting = true; orte_rml.recv_buffer_nb(ORTE_PROC_MY_PARENT, ORTE_RML_TAG_NODE_REGEX_REPORT, ORTE_RML_PERSISTENT, node_regex_report, &node_regex_waiting); - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_PARENT, buffer, + if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_PARENT, buffer, ORTE_RML_TAG_WARMUP_CONNECTION, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); @@ -955,8 +954,7 @@ int orte_daemon(int argc, char *argv[]) } /* send it to the designated target */ - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &target, buffer, + if (0 > (ret = orte_rml.send_buffer_nb(&target, buffer, ORTE_RML_TAG_ORTED_CALLBACK, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); @@ -1140,19 +1138,16 @@ static void rollup(int status, orte_process_name_t* sender, } static void report_orted() { - char *rtmod; int nreqd, ret; /* get the number of children */ - rtmod = orte_rml.get_routed(orte_mgmt_conduit); - nreqd = orte_routed.num_routes(rtmod) + 1; + nreqd = orte_routed.num_routes() + 1; if (nreqd == ncollected && NULL != mybucket && !node_regex_waiting) { /* add the collection of our children's buckets to ours */ opal_dss.copy_payload(mybucket, bucket); OBJ_RELEASE(bucket); /* relay this on to our parent */ - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_PARENT, mybucket, + if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_PARENT, mybucket, ORTE_RML_TAG_ORTED_CALLBACK, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); @@ -1175,7 +1170,7 @@ static void node_regex_report(int status, orte_process_name_t* sender, /* update the routing tree so any tree spawn operation * properly gets the number of children underneath us */ - orte_routed.update_routing_plan(NULL); + orte_routed.update_routing_plan(); *active = false; diff --git a/orte/orted/orted_submit.c b/orte/orted/orted_submit.c index 3e74eb7206..022fcc89d7 100644 --- a/orte/orted/orted_submit.c +++ b/orte/orted/orted_submit.c @@ -14,7 +14,7 @@ * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. @@ -575,14 +575,14 @@ int orte_submit_init(int argc, char *argv[], OBJ_DESTRUCT(&val); /* set the route to be direct */ - if (ORTE_SUCCESS != orte_routed.update_route(NULL, ORTE_PROC_MY_HNP, ORTE_PROC_MY_HNP)) { + if (ORTE_SUCCESS != orte_routed.update_route(ORTE_PROC_MY_HNP, ORTE_PROC_MY_HNP)) { orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, orte_process_info.my_hnp_uri); orte_finalize(); exit(1); } /* set the target hnp as our lifeline so we will terminate if it exits */ - orte_routed.set_lifeline(NULL, ORTE_PROC_MY_HNP); + orte_routed.set_lifeline(ORTE_PROC_MY_HNP); /* setup to listen for HNP response to my commands */ orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_NOTIFY_COMPLETE, @@ -700,8 +700,7 @@ int orte_submit_cancel(int index) { ORTE_ERROR_LOG(rc); return rc; } - rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, req, ORTE_RML_TAG_DAEMON, + rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, req, ORTE_RML_TAG_DAEMON, orte_rml_send_callback, NULL); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); @@ -724,8 +723,7 @@ int orte_submit_halt(void) ORTE_ERROR_LOG(rc); return rc; } - rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, req, + rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, req, ORTE_RML_TAG_DAEMON, orte_rml_send_callback, NULL); if (ORTE_SUCCESS != rc) { @@ -1146,8 +1144,7 @@ int orte_submit_job(char *argv[], int *index, ORTE_ERROR_LOG(rc); return rc; } - orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, req, ORTE_RML_TAG_DAEMON, + orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, req, ORTE_RML_TAG_DAEMON, orte_rml_send_callback, NULL); /* Inform the caller of the tracker index if they passed a index pointer */ @@ -3378,8 +3375,7 @@ void orte_profile_wakeup(int sd, short args, void *cbdata) for (i=0; i < nreports; i++) { OBJ_RETAIN(buffer); name.vpid = i; - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &name, buffer, + if (0 > (rc = orte_rml.send_buffer_nb(&name, buffer, ORTE_RML_TAG_DAEMON, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); diff --git a/orte/orted/pmix/pmix_server.c b/orte/orted/pmix/pmix_server.c index ee4f7f4d45..b363442a0e 100644 --- a/orte/orted/pmix/pmix_server.c +++ b/orte/orted/pmix/pmix_server.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science @@ -391,8 +391,7 @@ static void send_error(int status, opal_process_name_t *idreq, } /* send the response */ - orte_rml.send_buffer_nb(orte_mgmt_conduit, - remote, reply, + orte_rml.send_buffer_nb(remote, reply, ORTE_RML_TAG_DIRECT_MODEX_RESP, orte_rml_send_callback, NULL); return; @@ -435,8 +434,7 @@ static void _mdxresp(int sd, short args, void *cbdata) opal_dss.copy_payload(reply, &req->msg); /* send the response */ - orte_rml.send_buffer_nb(orte_mgmt_conduit, - &req->proxy, reply, + orte_rml.send_buffer_nb(&req->proxy, reply, ORTE_RML_TAG_DIRECT_MODEX_RESP, orte_rml_send_callback, NULL); diff --git a/orte/orted/pmix/pmix_server_dyn.c b/orte/orted/pmix/pmix_server_dyn.c index 259d719132..e84178ee50 100644 --- a/orte/orted/pmix/pmix_server_dyn.c +++ b/orte/orted/pmix/pmix_server_dyn.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science @@ -143,8 +143,7 @@ static void spawn(int sd, short args, void *cbdata) } /* send it to the HNP for processing - might be myself! */ - if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, buf, + if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_PLM, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); diff --git a/orte/orted/pmix/pmix_server_fence.c b/orte/orted/pmix/pmix_server_fence.c index fe0f942cd1..20c1849bbb 100644 --- a/orte/orted/pmix/pmix_server_fence.c +++ b/orte/orted/pmix/pmix_server_fence.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science @@ -261,8 +261,7 @@ static void dmodex_req(int sd, short args, void *cbdata) } /* send it to the host daemon */ - if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &dmn->name, buf, ORTE_RML_TAG_DIRECT_MODEX, + if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(&dmn->name, buf, ORTE_RML_TAG_DIRECT_MODEX, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); opal_hotel_checkout(&orte_pmix_server_globals.reqs, req->room_num); diff --git a/orte/orted/pmix/pmix_server_gen.c b/orte/orted/pmix/pmix_server_gen.c index 648d69557a..4d8aba4d54 100644 --- a/orte/orted/pmix/pmix_server_gen.c +++ b/orte/orted/pmix/pmix_server_gen.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014 Research Organization for Information Science @@ -1080,8 +1080,7 @@ void pmix_server_log_fn(opal_process_name_t *requestor, buf = OBJ_NEW(opal_buffer_t); opal_dss.load(buf, val->data.bo.bytes, val->data.bo.size); val->data.bo.bytes = NULL; - if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, buf, + if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_SHOW_HELP, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); diff --git a/orte/orted/pmix/pmix_server_pub.c b/orte/orted/pmix/pmix_server_pub.c index db1b44e2a5..72c9a4c506 100644 --- a/orte/orted/pmix/pmix_server_pub.c +++ b/orte/orted/pmix/pmix_server_pub.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science @@ -132,9 +132,9 @@ static int init_server(void) struct timeval timeout; timeout.tv_sec = orte_pmix_server_globals.timeout; timeout.tv_usec = 0; - if (ORTE_SUCCESS != (rc = orte_rml.ping(orte_mgmt_conduit, server, &timeout))) { + if (ORTE_SUCCESS != (rc = orte_rml.ping(server, &timeout))) { /* try it one more time */ - if (ORTE_SUCCESS != (rc = orte_rml.ping(orte_mgmt_conduit, server, &timeout))) { + if (ORTE_SUCCESS != (rc = orte_rml.ping(server, &timeout))) { /* okay give up */ orte_show_help("help-orterun.txt", "orterun:server-not-found", true, orte_basename, server, @@ -205,8 +205,7 @@ static void execute(int sd, short args, void *cbdata) } /* send the request to the target */ - rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - target, xfer, + rc = orte_rml.send_buffer_nb(target, xfer, ORTE_RML_TAG_DATA_SERVER, orte_rml_send_callback, NULL); if (ORTE_SUCCESS == rc) { diff --git a/orte/runtime/orte_data_server.c b/orte/runtime/orte_data_server.c index 903e17c66a..d4d3cb5fca 100644 --- a/orte/runtime/orte_data_server.c +++ b/orte/runtime/orte_data_server.c @@ -12,7 +12,7 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2016 Los Alamos National Security, LLC. * All rights reserved - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -336,8 +336,7 @@ void orte_data_server(int status, orte_process_name_t* sender, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&req->requestor)); - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &req->requestor, reply, ORTE_RML_TAG_DATA_CLIENT, + if (0 > (rc = orte_rml.send_buffer_nb(&req->requestor, reply, ORTE_RML_TAG_DATA_CLIENT, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(reply); @@ -716,8 +715,7 @@ void orte_data_server(int status, orte_process_name_t* sender, } SEND_ANSWER: - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, ORTE_RML_TAG_DATA_CLIENT, + if (0 > (rc = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_DATA_CLIENT, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(answer); diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index 2413ee7e90..abd2bfb386 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -72,10 +72,6 @@ char *orte_basename = NULL; bool orte_coprocessors_detected = false; opal_hash_table_t *orte_coprocessors = NULL; char *orte_topo_signature = NULL; -char *orte_mgmt_transport = NULL; -char *orte_coll_transport = NULL; -int orte_mgmt_conduit = -1; -int orte_coll_conduit = -1; bool orte_no_vm = false; char *orte_data_server_uri = NULL; diff --git a/orte/runtime/orte_globals.h b/orte/runtime/orte_globals.h index 68c03842b1..9abe705a47 100644 --- a/orte/runtime/orte_globals.h +++ b/orte/runtime/orte_globals.h @@ -74,11 +74,6 @@ ORTE_DECLSPEC extern bool orte_event_base_active; /* instantiated in orte/runtim ORTE_DECLSPEC extern bool orte_proc_is_bound; /* instantiated in orte/runtime/orte_init.c */ ORTE_DECLSPEC extern int orte_progress_thread_debug; /* instantiated in orte/runtime/orte_init.c */ -ORTE_DECLSPEC extern char *orte_mgmt_transport; -ORTE_DECLSPEC extern char *orte_coll_transport; -ORTE_DECLSPEC extern int orte_mgmt_conduit; -ORTE_DECLSPEC extern int orte_coll_conduit; - /** * Global indicating where this process was bound to at launch (will * be NULL if !orte_proc_is_bound) diff --git a/orte/runtime/orte_mca_params.c b/orte/runtime/orte_mca_params.c index 0e4c10acd1..5b50ea0e8d 100644 --- a/orte/runtime/orte_mca_params.c +++ b/orte/runtime/orte_mca_params.c @@ -746,19 +746,6 @@ int orte_register_params(void) OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY, &orte_daemon_cores); - /* get the conduit params */ - orte_coll_transport = "fabric,ethernet"; - (void) mca_base_var_register("orte", "orte", "coll", "transports", - "Comma-separated list of transports to use for ORTE collectives", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &orte_coll_transport); - - orte_mgmt_transport = "oob"; - (void) mca_base_var_register("orte", "orte", "mgmt", "transports", - "Comma-separated list of transports to use for ORTE management messages", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &orte_mgmt_transport); - /* Amount of time to wait for a stack trace to return from the daemons */ orte_stack_trace_wait_timeout = 30; (void) mca_base_var_register ("orte", "orte", NULL, "timeout_for_stack_trace", diff --git a/orte/util/comm/comm.c b/orte/util/comm/comm.c index fdcbcc033e..6f2fba50aa 100644 --- a/orte/util/comm/comm.c +++ b/orte/util/comm/comm.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2010-2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -136,7 +136,7 @@ int orte_util_comm_connect_tool(char *uri) OBJ_DESTRUCT(&val); /* set the route to be direct */ - if (ORTE_SUCCESS != (rc = orte_routed.update_route(NULL, &tool, &tool))) { + if (ORTE_SUCCESS != (rc = orte_routed.update_route(&tool, &tool))) { ORTE_ERROR_LOG(rc); return rc; } @@ -201,8 +201,7 @@ int orte_util_comm_report_event(orte_comm_event_t ev) opal_event_evtimer_add(quicktime, &tv); /* do the send */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &tool, buf, ORTE_RML_TAG_TOOL, send_cbfunc, NULL))) { + if (0 > (rc = orte_rml.send_buffer_nb(&tool, buf, ORTE_RML_TAG_TOOL, send_cbfunc, NULL))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buf); return rc; @@ -292,8 +291,7 @@ int orte_util_comm_query_job_info(const orte_process_name_t *hnp, orte_jobid_t j opal_event_evtimer_add(quicktime, &tv); /* do the send */ - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - (orte_process_name_t*)hnp, cmd, + if (0 > (ret = orte_rml.send_buffer_nb((orte_process_name_t*)hnp, cmd, ORTE_RML_TAG_DAEMON, send_cbfunc, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(cmd); @@ -401,8 +399,7 @@ int orte_util_comm_query_node_info(const orte_process_name_t *hnp, char *node, opal_event_evtimer_add(quicktime, &tv); /* do the send */ - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - (orte_process_name_t*)hnp, cmd, + if (0 > (ret = orte_rml.send_buffer_nb((orte_process_name_t*)hnp, cmd, ORTE_RML_TAG_DAEMON, send_cbfunc, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(cmd); @@ -519,8 +516,7 @@ int orte_util_comm_query_proc_info(const orte_process_name_t *hnp, orte_jobid_t opal_event_evtimer_add(quicktime, &tv); /* do the send */ - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - (orte_process_name_t*)hnp, cmd, ORTE_RML_TAG_DAEMON, + if (0 > (ret = orte_rml.send_buffer_nb((orte_process_name_t*)hnp, cmd, ORTE_RML_TAG_DAEMON, send_cbfunc, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(cmd); @@ -669,8 +665,7 @@ int orte_util_comm_spawn_job(const orte_process_name_t *hnp, orte_job_t *jdata) ORTE_NAME_PRINT(hnp))); /* tell the target HNP to launch the job */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - (orte_process_name_t*)hnp, buf, + if (0 > (rc = orte_rml.send_buffer_nb((orte_process_name_t*)hnp, buf, ORTE_RML_TAG_DAEMON, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); @@ -751,8 +746,7 @@ int orte_util_comm_terminate_job(const orte_process_name_t *hnp, orte_jobid_t jo ORTE_NAME_PRINT(hnp))); /* tell the target HNP to terminate the job */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - (orte_process_name_t*)hnp, buf, + if (0 > (rc = orte_rml.send_buffer_nb((orte_process_name_t*)hnp, buf, ORTE_RML_TAG_DAEMON, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); @@ -809,8 +803,7 @@ int orte_util_comm_halt_vm(const orte_process_name_t *hnp) } /* send the order */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - (orte_process_name_t*)hnp, buf, + if (0 > (rc = orte_rml.send_buffer_nb((orte_process_name_t*)hnp, buf, ORTE_RML_TAG_DAEMON, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); diff --git a/orte/util/hnp_contact.c b/orte/util/hnp_contact.c index f7cf36f837..134dd6a2c8 100644 --- a/orte/util/hnp_contact.c +++ b/orte/util/hnp_contact.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -161,7 +161,7 @@ int orte_read_hnp_contact_file(char *filename, orte_hnp_contact_t *hnp, bool con OBJ_DESTRUCT(&val); /* set the route to be direct */ - if (ORTE_SUCCESS != (rc = orte_routed.update_route(NULL, &hnp->name, &hnp->name))) { + if (ORTE_SUCCESS != (rc = orte_routed.update_route(&hnp->name, &hnp->name))) { ORTE_ERROR_LOG(rc); free(hnp_uri); return rc; diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index 39feb9677b..744718fa21 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -399,29 +399,30 @@ int orte_util_decode_nidmap(opal_buffer_t *buf) } else { vpid = UINT32_MAX; } - if (UINT32_MAX != vpid && - NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, vpid))) { - proc = OBJ_NEW(orte_proc_t); - proc->name.jobid = ORTE_PROC_MY_NAME->jobid; - proc->name.vpid = vpid; - proc->state = ORTE_PROC_STATE_RUNNING; - ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_ALIVE); - daemons->num_procs++; - opal_pointer_array_set_item(daemons->procs, proc->name.vpid, proc); + if (UINT32_MAX != vpid) { + if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, vpid))) { + proc = OBJ_NEW(orte_proc_t); + proc->name.jobid = ORTE_PROC_MY_NAME->jobid; + proc->name.vpid = vpid; + proc->state = ORTE_PROC_STATE_RUNNING; + ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_ALIVE); + daemons->num_procs++; + opal_pointer_array_set_item(daemons->procs, proc->name.vpid, proc); + } + nd->index = proc->name.vpid; + OBJ_RETAIN(nd); + proc->node = nd; + OBJ_RETAIN(proc); + nd->daemon = proc; } - nd->index = proc->name.vpid; - OBJ_RETAIN(nd); - proc->node = nd; - OBJ_RETAIN(proc); - nd->daemon = proc; } /* update num procs */ if (orte_process_info.num_procs != daemons->num_procs) { orte_process_info.num_procs = daemons->num_procs; - /* need to update the routing plan */ - orte_routed.update_routing_plan(NULL); } + /* need to update the routing plan */ + orte_routed.update_routing_plan(); if (orte_process_info.max_procs < orte_process_info.num_procs) { orte_process_info.max_procs = orte_process_info.num_procs; diff --git a/orte/util/show_help.c b/orte/util/show_help.c index 8ea1519d6b..f8078465c6 100644 --- a/orte/util/show_help.c +++ b/orte/util/show_help.c @@ -12,7 +12,7 @@ * Copyright (c) 2008-2018 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -694,8 +694,7 @@ int orte_show_help_norender(const char *filename, const char *topic, /* if we are a daemon, then send it via RML to the HNP */ if (ORTE_PROC_IS_DAEMON) { /* send it to the HNP */ - if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, buf, + if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_SHOW_HELP, orte_rml_send_callback, NULL))) { OBJ_RELEASE(buf); @@ -787,8 +786,7 @@ int orte_show_help_suppress(const char *filename, const char *topic) /* pack the flag that we DO NOT have a string */ opal_dss.pack(buf, &have_output, 1, OPAL_INT8); /* send it to the HNP */ - if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, buf, + if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_SHOW_HELP, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); From fc0b0938a70a2e39bc77056dd32de774c380750d Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 8 Feb 2019 11:25:35 -0800 Subject: [PATCH 8/8] Cache the old orte/regx components In case someone wants to restore and use them - leave them as opal_ignore'd for now Signed-off-by: Ralph Castain --- opal/mca/compress/fwd/.opal_ignore | 0 opal/mca/compress/fwd/Makefile.am | 36 ++ opal/mca/compress/fwd/owner.txt | 7 + opal/mca/compress/fwd/regx_fwd.c | 300 ++++++++++++++++ opal/mca/compress/fwd/regx_fwd.h | 28 ++ opal/mca/compress/fwd/regx_fwd_component.c | 44 +++ opal/mca/compress/reverse/.opal_ignore | 0 opal/mca/compress/reverse/Makefile.am | 36 ++ opal/mca/compress/reverse/owner.txt | 7 + opal/mca/compress/reverse/regx_reverse.c | 319 ++++++++++++++++++ opal/mca/compress/reverse/regx_reverse.h | 28 ++ .../compress/reverse/regx_reverse_component.c | 44 +++ 12 files changed, 849 insertions(+) create mode 100644 opal/mca/compress/fwd/.opal_ignore create mode 100644 opal/mca/compress/fwd/Makefile.am create mode 100644 opal/mca/compress/fwd/owner.txt create mode 100644 opal/mca/compress/fwd/regx_fwd.c create mode 100644 opal/mca/compress/fwd/regx_fwd.h create mode 100644 opal/mca/compress/fwd/regx_fwd_component.c create mode 100644 opal/mca/compress/reverse/.opal_ignore create mode 100644 opal/mca/compress/reverse/Makefile.am create mode 100644 opal/mca/compress/reverse/owner.txt create mode 100644 opal/mca/compress/reverse/regx_reverse.c create mode 100644 opal/mca/compress/reverse/regx_reverse.h create mode 100644 opal/mca/compress/reverse/regx_reverse_component.c diff --git a/opal/mca/compress/fwd/.opal_ignore b/opal/mca/compress/fwd/.opal_ignore new file mode 100644 index 0000000000..e69de29bb2 diff --git a/opal/mca/compress/fwd/Makefile.am b/opal/mca/compress/fwd/Makefile.am new file mode 100644 index 0000000000..6788cfede3 --- /dev/null +++ b/opal/mca/compress/fwd/Makefile.am @@ -0,0 +1,36 @@ +# +# Copyright (c) 2016-2019 Intel, Inc. All rights reserved. +# Copyright (c) 2017 IBM Corporation. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + regx_fwd_component.c \ + regx_fwd.h \ + regx_fwd.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_orte_regx_fwd_DSO +component_noinst = +component_install = mca_regx_fwd.la +else +component_noinst = libmca_regx_fwd.la +component_install = +endif + +mcacomponentdir = $(ortelibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_regx_fwd_la_SOURCES = $(sources) +mca_regx_fwd_la_LDFLAGS = -module -avoid-version +mca_regx_fwd_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la + +noinst_LTLIBRARIES = $(component_noinst) +libmca_regx_fwd_la_SOURCES = $(sources) +libmca_regx_fwd_la_LDFLAGS = -module -avoid-version diff --git a/opal/mca/compress/fwd/owner.txt b/opal/mca/compress/fwd/owner.txt new file mode 100644 index 0000000000..85b4416d20 --- /dev/null +++ b/opal/mca/compress/fwd/owner.txt @@ -0,0 +1,7 @@ +# +# owner/status file +# owner: institution that is responsible for this package +# status: e.g. active, maintenance, unmaintained +# +owner: INTEL +status: active diff --git a/opal/mca/compress/fwd/regx_fwd.c b/opal/mca/compress/fwd/regx_fwd.c new file mode 100644 index 0000000000..278de93214 --- /dev/null +++ b/opal/mca/compress/fwd/regx_fwd.c @@ -0,0 +1,300 @@ +/* + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/types.h" +#include "opal/types.h" + +#ifdef HAVE_UNISTD_H +#include +#endif +#include + +#include "opal/util/argv.h" +#include "opal/util/basename.h" +#include "opal/util/opal_environ.h" + +#include "orte/runtime/orte_globals.h" +#include "orte/util/name_fns.h" +#include "orte/util/show_help.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rmaps/base/base.h" +#include "orte/mca/routed/routed.h" +#include "orte/mca/regx/base/base.h" + +#include "regx_fwd.h" + +static int nidmap_create(opal_pointer_array_t *pool, char **regex); + +orte_regx_base_module_t orte_regx_fwd_module = { + .nidmap_create = nidmap_create, + .nidmap_parse = orte_regx_base_nidmap_parse, + .extract_node_names = orte_regx_base_extract_node_names, + .encode_nodemap = orte_regx_base_encode_nodemap, + .decode_daemon_nodemap = orte_regx_base_decode_daemon_nodemap, + .generate_ppn = orte_regx_base_generate_ppn, + .parse_ppn = orte_regx_base_parse_ppn +}; + +static int nidmap_create(opal_pointer_array_t *pool, char **regex) +{ + char *node; + char prefix[ORTE_MAX_NODE_PREFIX]; + int i, j, n, len, startnum, nodenum, numdigits; + bool found; + char *suffix, *sfx, *nodenames; + orte_regex_node_t *ndreg; + orte_regex_range_t *range, *rng; + opal_list_t nodenms, dvpids; + opal_list_item_t *item, *itm2; + char **regexargs = NULL, *tmp, *tmp2; + orte_node_t *nptr; + orte_vpid_t vpid; + + OBJ_CONSTRUCT(&nodenms, opal_list_t); + OBJ_CONSTRUCT(&dvpids, opal_list_t); + + rng = NULL; + for (n=0; n < pool->size; n++) { + if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(pool, n))) { + continue; + } + /* if no daemon has been assigned, then this node is not being used */ + if (NULL == nptr->daemon) { + vpid = -1; // indicates no daemon assigned + } else { + vpid = nptr->daemon->name.vpid; + } + /* deal with the daemon vpid - see if it is next in the + * current range */ + if (NULL == rng) { + /* just starting */ + rng = OBJ_NEW(orte_regex_range_t); + rng->vpid = vpid; + rng->cnt = 1; + opal_list_append(&dvpids, &rng->super); + } else if (UINT32_MAX == vpid) { + if (-1 == rng->vpid) { + rng->cnt++; + } else { + /* need to start another range */ + rng = OBJ_NEW(orte_regex_range_t); + rng->vpid = vpid; + rng->cnt = 1; + opal_list_append(&dvpids, &rng->super); + } + } else if (-1 == rng->vpid) { + /* need to start another range */ + rng = OBJ_NEW(orte_regex_range_t); + rng->vpid = vpid; + rng->cnt = 1; + opal_list_append(&dvpids, &rng->super); + } else { + /* is this the next in line */ + if (vpid == (orte_vpid_t)(rng->vpid + rng->cnt)) { + rng->cnt++; + } else { + /* need to start another range */ + rng = OBJ_NEW(orte_regex_range_t); + rng->vpid = vpid; + rng->cnt = 1; + opal_list_append(&dvpids, &rng->super); + } + } + node = nptr->name; + /* determine this node's prefix by looking for first digit char */ + len = strlen(node); + startnum = -1; + memset(prefix, 0, ORTE_MAX_NODE_PREFIX); + for (i=0, j=0; i < len; i++) { + /* valid hostname characters are ascii letters, digits and the '-' character. */ + if (isdigit(node[i])) { + /* count the size of the numeric field - but don't + * add the digits to the prefix + */ + if (startnum < 0) { + /* okay, this defines end of the prefix */ + startnum = i; + } + continue; + } + /* this must be either an alpha, a '.', or '-' */ + if (!isalpha(node[i]) && '-' != node[i] && '.' != node[i]) { + orte_show_help("help-regex.txt", "regex:invalid-name", true, node); + return ORTE_ERR_SILENT; + } + if (startnum < 0) { + prefix[j++] = node[i]; + } + } + if (startnum < 0) { + /* can't compress this name - just add it to the list */ + ndreg = OBJ_NEW(orte_regex_node_t); + ndreg->prefix = strdup(node); + opal_list_append(&nodenms, &ndreg->super); + continue; + } + /* convert the digits and get any suffix */ + nodenum = strtol(&node[startnum], &sfx, 10); + if (NULL != sfx) { + suffix = strdup(sfx); + numdigits = (int)(sfx - &node[startnum]); + } else { + suffix = NULL; + numdigits = (int)strlen(&node[startnum]); + } + /* is this node name already on our list? */ + found = false; + if (0 != opal_list_get_size(&nodenms)) { + ndreg = (orte_regex_node_t*)opal_list_get_last(&nodenms); + + if ((0 < strlen(prefix) && NULL == ndreg->prefix) || + (0 == strlen(prefix) && NULL != ndreg->prefix) || + (0 < strlen(prefix) && NULL != ndreg->prefix && + 0 != strcmp(prefix, ndreg->prefix)) || + (NULL == suffix && NULL != ndreg->suffix) || + (NULL != suffix && NULL == ndreg->suffix) || + (NULL != suffix && NULL != ndreg->suffix && + 0 != strcmp(suffix, ndreg->suffix)) || + (numdigits != ndreg->num_digits)) { + found = false; + } else { + /* found a match - flag it */ + found = true; + } + } + if (found) { + range = (orte_regex_range_t*)opal_list_get_last(&ndreg->ranges); + if (NULL == range) { + /* first range for this nodeid */ + range = OBJ_NEW(orte_regex_range_t); + range->vpid = nodenum; + range->cnt = 1; + opal_list_append(&ndreg->ranges, &range->super); + /* see if the node number is out of sequence */ + } else if (nodenum != (range->vpid + range->cnt)) { + /* start a new range */ + range = OBJ_NEW(orte_regex_range_t); + range->vpid = nodenum; + range->cnt = 1; + opal_list_append(&ndreg->ranges, &range->super); + } else { + /* everything matches - just increment the cnt */ + range->cnt++; + } + } else { + /* need to add it */ + ndreg = OBJ_NEW(orte_regex_node_t); + if (0 < strlen(prefix)) { + ndreg->prefix = strdup(prefix); + } + if (NULL != suffix) { + ndreg->suffix = strdup(suffix); + } + ndreg->num_digits = numdigits; + opal_list_append(&nodenms, &ndreg->super); + /* record the first range for this nodeid - we took + * care of names we can't compress above + */ + range = OBJ_NEW(orte_regex_range_t); + range->vpid = nodenum; + range->cnt = 1; + opal_list_append(&ndreg->ranges, &range->super); + } + if (NULL != suffix) { + free(suffix); + } + } + /* begin constructing the regular expression */ + while (NULL != (item = opal_list_remove_first(&nodenms))) { + ndreg = (orte_regex_node_t*)item; + + /* if no ranges, then just add the name */ + if (0 == opal_list_get_size(&ndreg->ranges)) { + if (NULL != ndreg->prefix) { + /* solitary node */ + opal_asprintf(&tmp, "%s", ndreg->prefix); + opal_argv_append_nosize(®exargs, tmp); + free(tmp); + } + OBJ_RELEASE(ndreg); + continue; + } + /* start the regex for this nodeid with the prefix */ + if (NULL != ndreg->prefix) { + opal_asprintf(&tmp, "%s[%d:", ndreg->prefix, ndreg->num_digits); + } else { + opal_asprintf(&tmp, "[%d:", ndreg->num_digits); + } + /* add the ranges */ + while (NULL != (itm2 = opal_list_remove_first(&ndreg->ranges))) { + range = (orte_regex_range_t*)itm2; + if (1 == range->cnt) { + opal_asprintf(&tmp2, "%s%u,", tmp, range->vpid); + } else { + opal_asprintf(&tmp2, "%s%u-%u,", tmp, range->vpid, range->vpid + range->cnt - 1); + } + free(tmp); + tmp = tmp2; + OBJ_RELEASE(range); + } + /* replace the final comma */ + tmp[strlen(tmp)-1] = ']'; + if (NULL != ndreg->suffix) { + /* add in the suffix, if provided */ + opal_asprintf(&tmp2, "%s%s", tmp, ndreg->suffix); + free(tmp); + tmp = tmp2; + } + opal_argv_append_nosize(®exargs, tmp); + free(tmp); + OBJ_RELEASE(ndreg); + } + + /* assemble final result */ + nodenames = opal_argv_join(regexargs, ','); + /* cleanup */ + opal_argv_free(regexargs); + OBJ_DESTRUCT(&nodenms); + + /* do the same for the vpids */ + tmp = NULL; + while (NULL != (item = opal_list_remove_first(&dvpids))) { + rng = (orte_regex_range_t*)item; + if (1 < rng->cnt) { + if (NULL == tmp) { + opal_asprintf(&tmp, "%u(%u)", rng->vpid, rng->cnt); + } else { + opal_asprintf(&tmp2, "%s,%u(%u)", tmp, rng->vpid, rng->cnt); + free(tmp); + tmp = tmp2; + } + } else { + if (NULL == tmp) { + opal_asprintf(&tmp, "%u", rng->vpid); + } else { + opal_asprintf(&tmp2, "%s,%u", tmp, rng->vpid); + free(tmp); + tmp = tmp2; + } + } + OBJ_RELEASE(rng); + } + OPAL_LIST_DESTRUCT(&dvpids); + + /* now concatenate the results into one string */ + opal_asprintf(&tmp2, "%s@%s", nodenames, tmp); + free(nodenames); + free(tmp); + *regex = tmp2; + return ORTE_SUCCESS; +} diff --git a/opal/mca/compress/fwd/regx_fwd.h b/opal/mca/compress/fwd/regx_fwd.h new file mode 100644 index 0000000000..fbaf78ac82 --- /dev/null +++ b/opal/mca/compress/fwd/regx_fwd.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef _MCA_REGX_FwD_H_ +#define _MCA_REGX_FwD_H_ + +#include "orte_config.h" + +#include "orte/types.h" + +#include "opal/mca/base/base.h" +#include "orte/mca/regx/regx.h" + + +BEGIN_C_DECLS + +ORTE_MODULE_DECLSPEC extern orte_regx_base_component_t mca_regx_fwd_component; +extern orte_regx_base_module_t orte_regx_fwd_module; + +END_C_DECLS + +#endif /* MCA_REGX_FwD_H_ */ diff --git a/opal/mca/compress/fwd/regx_fwd_component.c b/opal/mca/compress/fwd/regx_fwd_component.c new file mode 100644 index 0000000000..0a9e4a9dce --- /dev/null +++ b/opal/mca/compress/fwd/regx_fwd_component.c @@ -0,0 +1,44 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/types.h" +#include "opal/types.h" + +#include "opal/util/show_help.h" + +#include "orte/mca/regx/regx.h" +#include "regx_fwd.h" + +static int component_query(mca_base_module_t **module, int *priority); + +/* + * Struct of function pointers and all that to let us be initialized + */ +orte_regx_base_component_t mca_regx_fwd_component = { + .base_version = { + MCA_REGX_BASE_VERSION_1_0_0, + .mca_component_name = "fwd", + MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, + ORTE_RELEASE_VERSION), + .mca_query_component = component_query, + }, + .base_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, +}; + +static int component_query(mca_base_module_t **module, int *priority) +{ + *module = (mca_base_module_t*)&orte_regx_fwd_module; + *priority = 10; + return ORTE_SUCCESS; +} diff --git a/opal/mca/compress/reverse/.opal_ignore b/opal/mca/compress/reverse/.opal_ignore new file mode 100644 index 0000000000..e69de29bb2 diff --git a/opal/mca/compress/reverse/Makefile.am b/opal/mca/compress/reverse/Makefile.am new file mode 100644 index 0000000000..38a5ca41ec --- /dev/null +++ b/opal/mca/compress/reverse/Makefile.am @@ -0,0 +1,36 @@ +# +# Copyright (c) 2016-2019 Intel, Inc. All rights reserved. +# Copyright (c) 2017 IBM Corporation. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + regx_reverse_component.c \ + regx_reverse.h \ + regx_reverse.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_orte_regx_reverse_DSO +component_noinst = +component_install = mca_regx_reverse.la +else +component_noinst = libmca_regx_reverse.la +component_install = +endif + +mcacomponentdir = $(ortelibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_regx_reverse_la_SOURCES = $(sources) +mca_regx_reverse_la_LDFLAGS = -module -avoid-version +mca_regx_reverse_la_LIBADD = $(top_builddir)/orte/lib@ORTE_LIB_PREFIX@open-rte.la + +noinst_LTLIBRARIES = $(component_noinst) +libmca_regx_reverse_la_SOURCES = $(sources) +libmca_regx_reverse_la_LDFLAGS = -module -avoid-version diff --git a/opal/mca/compress/reverse/owner.txt b/opal/mca/compress/reverse/owner.txt new file mode 100644 index 0000000000..2fd247dddb --- /dev/null +++ b/opal/mca/compress/reverse/owner.txt @@ -0,0 +1,7 @@ +# +# owner/status file +# owner: institution that is responsible for this package +# status: e.g. active, maintenance, unmaintained +# +owner: IBM +status: active diff --git a/opal/mca/compress/reverse/regx_reverse.c b/opal/mca/compress/reverse/regx_reverse.c new file mode 100644 index 0000000000..c6f03b07e9 --- /dev/null +++ b/opal/mca/compress/reverse/regx_reverse.c @@ -0,0 +1,319 @@ +/* + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. + * Copyright (c) 2018 IBM Corporation. All rights reserved. + * Copyright (c) 2018 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/types.h" +#include "opal/types.h" + +#ifdef HAVE_UNISTD_H +#include +#endif +#include + +#include "opal/util/argv.h" +#include "opal/util/basename.h" +#include "opal/util/opal_environ.h" + +#include "orte/runtime/orte_globals.h" +#include "orte/util/name_fns.h" +#include "orte/util/show_help.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rmaps/base/base.h" +#include "orte/mca/routed/routed.h" +#include "orte/mca/regx/base/base.h" + +#include "regx_reverse.h" + +static int nidmap_create(opal_pointer_array_t *pool, char **regex); + +orte_regx_base_module_t orte_regx_reverse_module = { + .nidmap_create = nidmap_create, + .nidmap_parse = orte_regx_base_nidmap_parse, + .extract_node_names = orte_regx_base_extract_node_names, + .encode_nodemap = orte_regx_base_encode_nodemap, + .decode_daemon_nodemap = orte_regx_base_decode_daemon_nodemap, + .generate_ppn = orte_regx_base_generate_ppn, + .parse_ppn = orte_regx_base_parse_ppn +}; + +static int nidmap_create(opal_pointer_array_t *pool, char **regex) +{ + char *node; + char prefix[ORTE_MAX_NODE_PREFIX]; + int i, j, n, len, startnum, nodenum, numdigits; + bool found; + char *suffix, *sfx, *nodenames; + orte_regex_node_t *ndreg; + orte_regex_range_t *range, *rng; + opal_list_t nodenms, dvpids; + opal_list_item_t *item, *itm2; + char **regexargs = NULL, *tmp, *tmp2; + orte_node_t *nptr; + orte_vpid_t vpid; + + OBJ_CONSTRUCT(&nodenms, opal_list_t); + OBJ_CONSTRUCT(&dvpids, opal_list_t); + + rng = NULL; + for (n=0; n < pool->size; n++) { + if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(pool, n))) { + continue; + } + /* if no daemon has been assigned, then this node is not being used */ + if (NULL == nptr->daemon) { + vpid = -1; // indicates no daemon assigned + } else { + vpid = nptr->daemon->name.vpid; + } + /* deal with the daemon vpid - see if it is next in the + * current range */ + if (NULL == rng) { + /* just starting */ + rng = OBJ_NEW(orte_regex_range_t); + rng->vpid = vpid; + rng->cnt = 1; + opal_list_append(&dvpids, &rng->super); + } else if (UINT32_MAX == vpid) { + if (-1 == rng->vpid) { + rng->cnt++; + } else { + /* need to start another range */ + rng = OBJ_NEW(orte_regex_range_t); + rng->vpid = vpid; + rng->cnt = 1; + opal_list_append(&dvpids, &rng->super); + } + } else if (-1 == rng->vpid) { + /* need to start another range */ + rng = OBJ_NEW(orte_regex_range_t); + rng->vpid = vpid; + rng->cnt = 1; + opal_list_append(&dvpids, &rng->super); + } else { + /* is this the next in line */ + if (vpid == (orte_vpid_t)(rng->vpid + rng->cnt)) { + rng->cnt++; + } else { + /* need to start another range */ + rng = OBJ_NEW(orte_regex_range_t); + rng->vpid = vpid; + rng->cnt = 1; + opal_list_append(&dvpids, &rng->super); + } + } + node = nptr->name; + opal_output_verbose(5, orte_regx_base_framework.framework_output, + "%s PROCESS NODE <%s>", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + node); + /* determine this node's prefix by looking for first digit char */ + len = strlen(node); + startnum = -1; + memset(prefix, 0, ORTE_MAX_NODE_PREFIX); + numdigits = 0; + + /* Valid hostname characters are: + * - ascii letters, digits, and the '-' character. + * Determine the prefix in reverse to better support hostnames like: + * c712f6n01, c699c086 where there are sets of digits, and the lowest + * set changes most frequently. + */ + startnum = -1; + memset(prefix, 0, ORTE_MAX_NODE_PREFIX); + numdigits = 0; + for (i=len-1; i >= 0; i--) { + // Count all of the digits + if( isdigit(node[i]) ) { + numdigits++; + continue; + } + else { + // At this point everything at and above position 'i' is prefix. + for( j = 0; j <= i; ++j) { + prefix[j] = node[j]; + } + if (numdigits) { + startnum = j; + } + break; + } + } + + opal_output_verbose(5, orte_regx_base_framework.framework_output, + "%s PROCESS NODE <%s> : reverse / prefix \"%s\" / numdigits %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + node, prefix, numdigits); + + if (startnum < 0) { + /* can't compress this name - just add it to the list */ + ndreg = OBJ_NEW(orte_regex_node_t); + ndreg->prefix = strdup(node); + opal_list_append(&nodenms, &ndreg->super); + continue; + } + /* convert the digits and get any suffix */ + nodenum = strtol(&node[startnum], &sfx, 10); + if (NULL != sfx) { + suffix = strdup(sfx); + } else { + suffix = NULL; + } + /* is this node name already on our list? */ + found = false; + if (0 != opal_list_get_size(&nodenms)) { + ndreg = (orte_regex_node_t*)opal_list_get_last(&nodenms); + + if ((0 < strlen(prefix) && NULL == ndreg->prefix) || + (0 == strlen(prefix) && NULL != ndreg->prefix) || + (0 < strlen(prefix) && NULL != ndreg->prefix && + 0 != strcmp(prefix, ndreg->prefix)) || + (NULL == suffix && NULL != ndreg->suffix) || + (NULL != suffix && NULL == ndreg->suffix) || + (NULL != suffix && NULL != ndreg->suffix && + 0 != strcmp(suffix, ndreg->suffix)) || + (numdigits != ndreg->num_digits)) { + found = false; + } else { + /* found a match - flag it */ + found = true; + } + } + if (found) { + /* get the last range on this nodeid - we do this + * to preserve order + */ + range = (orte_regex_range_t*)opal_list_get_last(&ndreg->ranges); + if (NULL == range) { + /* first range for this nodeid */ + range = OBJ_NEW(orte_regex_range_t); + range->vpid = nodenum; + range->cnt = 1; + opal_list_append(&ndreg->ranges, &range->super); + /* see if the node number is out of sequence */ + } else if (nodenum != (range->vpid + range->cnt)) { + /* start a new range */ + range = OBJ_NEW(orte_regex_range_t); + range->vpid = nodenum; + range->cnt = 1; + opal_list_append(&ndreg->ranges, &range->super); + } else { + /* everything matches - just increment the cnt */ + range->cnt++; + } + } else { + /* need to add it */ + ndreg = OBJ_NEW(orte_regex_node_t); + if (0 < strlen(prefix)) { + ndreg->prefix = strdup(prefix); + } + if (NULL != suffix) { + ndreg->suffix = strdup(suffix); + } + ndreg->num_digits = numdigits; + opal_list_append(&nodenms, &ndreg->super); + /* record the first range for this nodeid - we took + * care of names we can't compress above + */ + range = OBJ_NEW(orte_regex_range_t); + range->vpid = nodenum; + range->cnt = 1; + opal_list_append(&ndreg->ranges, &range->super); + } + if (NULL != suffix) { + free(suffix); + } + } + /* begin constructing the regular expression */ + while (NULL != (item = opal_list_remove_first(&nodenms))) { + ndreg = (orte_regex_node_t*)item; + + /* if no ranges, then just add the name */ + if (0 == opal_list_get_size(&ndreg->ranges)) { + if (NULL != ndreg->prefix) { + /* solitary node */ + opal_asprintf(&tmp, "%s", ndreg->prefix); + opal_argv_append_nosize(®exargs, tmp); + free(tmp); + } + OBJ_RELEASE(ndreg); + continue; + } + /* start the regex for this nodeid with the prefix */ + if (NULL != ndreg->prefix) { + opal_asprintf(&tmp, "%s[%d:", ndreg->prefix, ndreg->num_digits); + } else { + opal_asprintf(&tmp, "[%d:", ndreg->num_digits); + } + /* add the ranges */ + while (NULL != (itm2 = opal_list_remove_first(&ndreg->ranges))) { + range = (orte_regex_range_t*)itm2; + if (1 == range->cnt) { + opal_asprintf(&tmp2, "%s%u,", tmp, range->vpid); + } else { + opal_asprintf(&tmp2, "%s%u-%u,", tmp, range->vpid, range->vpid + range->cnt - 1); + } + free(tmp); + tmp = tmp2; + OBJ_RELEASE(range); + } + /* replace the final comma */ + tmp[strlen(tmp)-1] = ']'; + if (NULL != ndreg->suffix) { + /* add in the suffix, if provided */ + opal_asprintf(&tmp2, "%s%s", tmp, ndreg->suffix); + free(tmp); + tmp = tmp2; + } + opal_argv_append_nosize(®exargs, tmp); + free(tmp); + OBJ_RELEASE(ndreg); + } + + /* assemble final result */ + nodenames = opal_argv_join(regexargs, ','); + /* cleanup */ + opal_argv_free(regexargs); + OBJ_DESTRUCT(&nodenms); + + /* do the same for the vpids */ + tmp = NULL; + while (NULL != (item = opal_list_remove_first(&dvpids))) { + rng = (orte_regex_range_t*)item; + if (1 < rng->cnt) { + if (NULL == tmp) { + opal_asprintf(&tmp, "%u(%u)", rng->vpid, rng->cnt); + } else { + opal_asprintf(&tmp2, "%s,%u(%u)", tmp, rng->vpid, rng->cnt); + free(tmp); + tmp = tmp2; + } + } else { + if (NULL == tmp) { + opal_asprintf(&tmp, "%u", rng->vpid); + } else { + opal_asprintf(&tmp2, "%s,%u", tmp, rng->vpid); + free(tmp); + tmp = tmp2; + } + } + OBJ_RELEASE(rng); + } + OPAL_LIST_DESTRUCT(&dvpids); + + /* now concatenate the results into one string */ + opal_asprintf(&tmp2, "%s@%s", nodenames, tmp); + free(nodenames); + free(tmp); + *regex = tmp2; + return ORTE_SUCCESS; +} diff --git a/opal/mca/compress/reverse/regx_reverse.h b/opal/mca/compress/reverse/regx_reverse.h new file mode 100644 index 0000000000..d568ddfe60 --- /dev/null +++ b/opal/mca/compress/reverse/regx_reverse.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef _MCA_REGX_REVERSE_H_ +#define _MCA_REGX_REVERSE_H_ + +#include "orte_config.h" + +#include "orte/types.h" + +#include "opal/mca/base/base.h" +#include "orte/mca/regx/regx.h" + + +BEGIN_C_DECLS + +ORTE_MODULE_DECLSPEC extern orte_regx_base_component_t mca_regx_reverse_component; +extern orte_regx_base_module_t orte_regx_reverse_module; + +END_C_DECLS + +#endif /* MCA_REGX_ORTE_H_ */ diff --git a/opal/mca/compress/reverse/regx_reverse_component.c b/opal/mca/compress/reverse/regx_reverse_component.c new file mode 100644 index 0000000000..27a7b044cd --- /dev/null +++ b/opal/mca/compress/reverse/regx_reverse_component.c @@ -0,0 +1,44 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/types.h" +#include "opal/types.h" + +#include "opal/util/show_help.h" + +#include "orte/mca/regx/regx.h" +#include "regx_reverse.h" + +static int component_query(mca_base_module_t **module, int *priority); + +/* + * Struct of function pointers and all that to let us be initialized + */ +orte_regx_base_component_t mca_regx_reverse_component = { + .base_version = { + MCA_REGX_BASE_VERSION_1_0_0, + .mca_component_name = "reverse", + MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, + ORTE_RELEASE_VERSION), + .mca_query_component = component_query, + }, + .base_data = { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + }, +}; + +static int component_query(mca_base_module_t **module, int *priority) +{ + *module = (mca_base_module_t*)&orte_regx_reverse_module; + *priority = 1; + return ORTE_SUCCESS; +}