From 37dfdb76eb12027e9b9910956e6d3ad90ded7ff1 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 14 Sep 2006 21:29:51 +0000 Subject: [PATCH] Here is the major MAD-cure commit. I have written plenty about it, so I refer you here to those messages for a description of everything that was done. This commit was SVN r11661. --- ompi/communicator/comm_dyn.c | 4 +- ompi/mca/btl/base/btl_base_select.c | 4 +- ompi/mca/btl/tcp/btl_tcp_component.c | 6 +- ompi/mca/btl/tcp/btl_tcp_endpoint.c | 6 +- ompi/mca/mtl/psm/Makefile.in | 53 +- ompi/mca/pml/base/pml_base_select.c | 6 +- ompi/runtime/ompi_mpi_abort.c | 19 +- ompi/tools/ompi_info/components.cc | 2 +- opal/include/opal/types.h | 4 +- .../paffinity/linux/paffinity_linux_module.c | 2 +- orte/dss/dss_arith.c | 3 +- orte/dss/dss_compare.c | 10 - orte/dss/dss_copy.c | 4 - orte/dss/dss_internal.h | 58 +- orte/dss/dss_open_close.c | 13 - orte/dss/dss_pack.c | 16 - orte/dss/dss_print.c | 22 - orte/dss/dss_size.c | 4 - orte/dss/dss_unpack.c | 91 -- orte/include/orte/orte_types.h | 4 - orte/mca/errmgr/base/Makefile.am | 2 + orte/mca/errmgr/base/base.h | 21 +- orte/mca/errmgr/base/errmgr_base_close.c | 6 + orte/mca/errmgr/base/errmgr_base_fns.c | 64 +- orte/mca/errmgr/base/errmgr_base_open.c | 22 +- orte/mca/errmgr/base/errmgr_base_receive.c | 162 +++ orte/mca/errmgr/base/errmgr_base_select.c | 15 +- orte/mca/errmgr/base/errmgr_private.h | 82 ++ orte/mca/errmgr/bproc/Makefile.am | 51 + .../bproc_orted => errmgr/bproc}/configure.m4 | 20 +- orte/mca/errmgr/bproc/configure.params | 23 + orte/mca/errmgr/bproc/errmgr_bproc.c | 223 ++++ orte/mca/errmgr/bproc/errmgr_bproc.h | 81 ++ .../mca/errmgr/bproc/errmgr_bproc_component.c | 164 +++ orte/mca/errmgr/errmgr.h | 115 +- orte/mca/errmgr/hnp/Makefile.am | 46 + orte/mca/errmgr/hnp/configure.params | 23 + orte/mca/errmgr/hnp/errmgr_hnp.c | 205 ++++ orte/mca/errmgr/hnp/errmgr_hnp.h | 79 ++ orte/mca/errmgr/hnp/errmgr_hnp_component.c | 174 +++ orte/mca/errmgr/orted/Makefile.am | 46 + orte/mca/errmgr/orted/configure.params | 23 + orte/mca/errmgr/orted/errmgr_orted.c | 192 +++ orte/mca/errmgr/orted/errmgr_orted.h | 81 ++ .../mca/errmgr/orted/errmgr_orted_component.c | 164 +++ orte/mca/errmgr/proxy/Makefile.am | 46 + orte/mca/errmgr/proxy/configure.params | 23 + orte/mca/errmgr/proxy/errmgr_proxy.c | 187 +++ orte/mca/errmgr/proxy/errmgr_proxy.h | 81 ++ .../mca/errmgr/proxy/errmgr_proxy_component.c | 163 +++ .../replica/functional_layer/gpr_replica_fn.h | 1 + .../gpr_replica_messaging_fn.c | 11 +- .../ns_data_type_compare_fns.c | 113 +- orte/mca/ns/base/ns_base_open.c | 10 +- orte/mca/ns/ns_types.h | 20 +- orte/mca/ns/proxy/src/ns_proxy.c | 14 +- orte/mca/odls/Makefile.am | 42 + orte/mca/odls/base/Makefile.am | 33 + orte/mca/odls/base/base.h | 80 ++ .../base/data_type_support/odls_compare_fns.c | 31 + .../base/data_type_support/odls_copy_fns.c | 40 + .../base/data_type_support/odls_packing_fns.c | 42 + .../base/data_type_support/odls_print_fns.c | 45 + .../base/data_type_support/odls_release_fns.c | 30 + .../base/data_type_support/odls_size_fns.c | 30 + .../data_type_support/odls_unpacking_fns.c | 70 ++ orte/mca/odls/base/odls_base_close.c | 56 + orte/mca/odls/base/odls_base_open.c | 114 ++ orte/mca/odls/base/odls_base_select.c | 113 ++ orte/mca/odls/base/odls_private.h | 80 ++ .../bproc_orted => odls/bproc}/Makefile.am | 33 +- orte/mca/odls/bproc/configure.m4 | 38 + orte/mca/odls/bproc/configure.params | 23 + .../bproc/odls_bproc.c} | 361 +++--- orte/mca/odls/bproc/odls_bproc.h | 108 ++ .../bproc/odls_bproc_component.c} | 89 +- orte/mca/odls/default/Makefile.am | 48 + .../{pls/fork => odls/default}/configure.m4 | 5 +- .../fork => odls/default}/configure.params | 2 +- .../default/help-odls-default.txt} | 23 +- orte/mca/odls/default/odls_default.h | 105 ++ .../mca/odls/default/odls_default_component.c | 163 +++ orte/mca/odls/default/odls_default_module.c | 1037 +++++++++++++++++ orte/mca/odls/odls.h | 130 +++ orte/mca/odls/odls_types.h | 51 + orte/mca/odls/windows/.ompi_ignore | 0 .../{pls/process => odls/windows}/Makefile.am | 0 .../process => odls/windows}/configure.m4 | 4 +- .../process => odls/windows}/configure.params | 2 +- .../windows/help-odls-windows.txt} | 0 .../windows/odls_windows.h} | 0 .../windows/odls_windows_component.c} | 0 .../windows/odls_windows_module.c} | 0 orte/mca/oob/base/oob_base_xcast.c | 2 +- orte/mca/oob/oob_types.h | 17 - orte/mca/oob/tcp/oob_tcp.c | 303 ++++- orte/mca/oob/tcp/oob_tcp.h | 27 +- orte/mca/oob/tcp/oob_tcp_peer.c | 8 +- orte/mca/pls/Makefile.am | 2 +- orte/mca/pls/base/Makefile.am | 8 +- orte/mca/pls/base/base.h | 92 +- orte/mca/pls/base/pls_base_close.c | 31 +- orte/mca/pls/base/pls_base_dmn_registry_fns.c | 217 ++++ .../pls/base/pls_base_general_support_fns.c | 56 + orte/mca/pls/base/pls_base_open.c | 13 +- orte/mca/pls/base/pls_base_orted_cmds.c | 225 ++++ orte/mca/pls/base/pls_base_proxy.c | 418 ------- orte/mca/pls/base/pls_base_receive.c | 211 ++++ orte/mca/pls/base/pls_base_select.c | 204 +--- orte/mca/pls/base/pls_private.h | 102 ++ orte/mca/pls/bproc/Makefile.am | 1 + orte/mca/pls/bproc/pls_bproc.c | 166 ++- orte/mca/pls/bproc/pls_bproc.h | 39 +- orte/mca/pls/bproc/pls_bproc_component.c | 5 +- .../pls_bproc_state.c} | 106 +- orte/mca/pls/bproc_orted/pls_bproc_orted.h | 87 -- orte/mca/pls/cnos/Makefile.am | 49 + orte/mca/pls/cnos/configure.m4 | 37 + orte/mca/pls/cnos/configure.params | 21 + orte/mca/pls/cnos/pls_cnos.c | 152 +++ orte/mca/pls/cnos/pls_cnos.h | 38 + orte/mca/pls/cnos/pls_cnos_component.c | 97 ++ orte/mca/pls/fork/pls_fork.h | 77 -- orte/mca/pls/fork/pls_fork_component.c | 141 --- orte/mca/pls/fork/pls_fork_module.c | 871 -------------- orte/mca/pls/gridengine/pls_gridengine.h | 3 +- .../pls/gridengine/pls_gridengine_component.c | 33 +- .../pls/gridengine/pls_gridengine_module.c | 234 ++-- orte/mca/pls/pls.h | 53 +- orte/mca/pls/pls_types.h | 36 + orte/mca/pls/poe/pls_poe_component.c | 12 +- orte/mca/pls/poe/pls_poe_module.c | 47 +- orte/mca/pls/{fork => proxy}/Makefile.am | 28 +- .../{bproc_orted => proxy}/configure.params | 2 +- orte/mca/pls/proxy/pls_proxy.c | 442 +++++++ orte/mca/pls/proxy/pls_proxy.h | 62 + orte/mca/pls/proxy/pls_proxy_component.c | 123 ++ orte/mca/pls/rsh/pls_rsh.h | 1 + orte/mca/pls/rsh/pls_rsh_component.c | 26 +- orte/mca/pls/rsh/pls_rsh_module.c | 179 ++- orte/mca/pls/slurm/pls_slurm.h | 2 +- orte/mca/pls/slurm/pls_slurm_component.c | 31 +- orte/mca/pls/slurm/pls_slurm_module.c | 146 ++- orte/mca/pls/tm/pls_tm.h | 2 +- orte/mca/pls/tm/pls_tm_component.c | 26 +- orte/mca/pls/tm/pls_tm_module.c | 341 ++++-- orte/mca/pls/xgrid/.ompi_ignore | 0 orte/mca/pls/xgrid/src/pls_xgrid_component.m | 7 + orte/mca/ras/base/Makefile.am | 5 +- orte/mca/ras/base/base.h | 24 +- .../ras_data_type_compare_fns.c | 2 +- .../ras_data_type_copy_fns.c | 2 +- .../ras_data_type_packing_fns.c | 2 +- .../ras_data_type_print_fns.c | 2 +- .../ras_data_type_release_fns.c | 2 +- .../ras_data_type_size_fns.c | 2 +- .../ras_data_type_unpacking_fns.c | 2 +- orte/mca/ras/base/ras_base_alloc.c | 4 +- orte/mca/ras/base/ras_base_allocate.c | 17 +- orte/mca/ras/base/ras_base_close.c | 12 +- orte/mca/ras/base/ras_base_find_available.c | 79 +- orte/mca/ras/base/ras_base_no_ops.c | 58 + orte/mca/ras/base/ras_base_node.c | 16 +- orte/mca/ras/base/ras_base_node.h | 72 -- orte/mca/ras/base/ras_base_open.c | 48 +- orte/mca/ras/base/ras_base_receive.c | 131 +++ orte/mca/ras/base/ras_private.h | 139 +++ orte/mca/ras/bjs/ras_bjs.c | 30 +- orte/mca/ras/bjs/ras_bjs_component.c | 9 +- .../ras/dash_host/ras_dash_host_component.c | 4 +- orte/mca/ras/dash_host/ras_dash_host_module.c | 23 +- .../ras/gridengine/ras_gridengine_component.c | 9 +- .../ras/gridengine/ras_gridengine_module.c | 44 +- .../mca/ras/hostfile/ras_hostfile_component.c | 4 +- orte/mca/ras/hostfile/ras_hostfile_module.c | 12 +- .../ras/localhost/ras_localhost_component.c | 4 +- orte/mca/ras/localhost/ras_localhost_module.c | 12 +- orte/mca/ras/lsf_bproc/ras_lsf_bproc.c | 18 +- .../ras/lsf_bproc/ras_lsf_bproc_component.c | 9 +- orte/mca/ras/poe/ras_poe_component.c | 9 +- orte/mca/ras/poe/ras_poe_module.c | 19 +- orte/mca/ras/proxy/Makefile.am | 51 + orte/mca/ras/proxy/configure.params | 23 + orte/mca/ras/proxy/ras_proxy.c | 170 +++ orte/mca/ras/proxy/ras_proxy.h | 58 + orte/mca/ras/proxy/ras_proxy_component.c | 123 ++ orte/mca/ras/ras.h | 46 +- orte/mca/ras/slurm/ras_slurm.h | 2 +- orte/mca/ras/slurm/ras_slurm_component.c | 12 +- orte/mca/ras/slurm/ras_slurm_module.c | 21 +- orte/mca/ras/tm/ras_tm.h | 2 +- orte/mca/ras/tm/ras_tm_component.c | 12 +- orte/mca/ras/tm/ras_tm_module.c | 21 +- orte/mca/ras/xgrid/src/ras_xgrid.h | 2 +- orte/mca/ras/xgrid/src/ras_xgrid_component.c | 12 +- orte/mca/ras/xgrid/src/ras_xgrid_module.c | 25 +- orte/mca/rds/base/Makefile.am | 2 + orte/mca/rds/base/base.h | 13 +- orte/mca/rds/base/rds_base_close.c | 12 +- orte/mca/rds/base/rds_base_no_ops.c | 39 + orte/mca/rds/base/rds_base_open.c | 38 +- orte/mca/rds/base/rds_base_query.c | 2 + orte/mca/rds/base/rds_base_receive.c | 111 ++ orte/mca/rds/base/rds_base_registry_fns.c | 1 + orte/mca/rds/base/rds_base_select.c | 7 +- orte/mca/rds/base/rds_private.h | 73 ++ orte/mca/rds/hostfile/rds_hostfile.c | 38 +- .../mca/rds/hostfile/rds_hostfile_component.c | 35 +- orte/mca/rds/proxy/Makefile.am | 51 + orte/mca/rds/proxy/configure.params | 23 + orte/mca/rds/proxy/rds_proxy.c | 100 ++ orte/mca/rds/proxy/rds_proxy.h | 57 + orte/mca/rds/proxy/rds_proxy_component.c | 118 ++ orte/mca/rds/rds.h | 39 +- orte/mca/rds/resfile/rds_resfile.c | 18 +- orte/mca/rds/resfile/rds_resfile_component.c | 35 +- orte/mca/rmaps/base/Makefile.am | 9 +- orte/mca/rmaps/base/base.h | 23 +- orte/mca/rmaps/base/rmaps_base_close.c | 34 +- orte/mca/rmaps/base/rmaps_base_find_avail.c | 129 ++ orte/mca/rmaps/base/rmaps_base_map.c | 131 +-- orte/mca/rmaps/base/rmaps_base_map.h | 111 -- ...aps_base_select.c => rmaps_base_map_job.c} | 44 +- orte/mca/rmaps/base/rmaps_base_no_ops.c | 35 + orte/mca/rmaps/base/rmaps_base_node.c | 8 +- orte/mca/rmaps/base/rmaps_base_node.h | 64 - orte/mca/rmaps/base/rmaps_base_open.c | 125 +- orte/mca/rmaps/base/rmaps_base_receive.c | 154 +++ orte/mca/rmaps/base/rmaps_private.h | 127 ++ orte/mca/rmaps/proxy/Makefile.am | 46 + orte/mca/rmaps/proxy/configure.params | 23 + orte/mca/rmaps/proxy/rmaps_proxy.c | 114 ++ orte/mca/rmaps/proxy/rmaps_proxy.h | 70 ++ orte/mca/rmaps/proxy/rmaps_proxy_component.c | 151 +++ orte/mca/rmaps/rmaps.h | 35 +- orte/mca/rmaps/rmaps_types.h | 92 ++ orte/mca/rmaps/round_robin/rmaps_rr.c | 15 +- .../rmaps/round_robin/rmaps_rr_component.c | 18 +- orte/mca/rmgr/base/Makefile.am | 6 +- orte/mca/rmgr/base/base.h | 144 --- .../rmgr_data_type_compare_fns.c | 2 +- .../rmgr_data_type_copy_fns.c | 2 +- .../rmgr_data_type_packing_fns.c | 2 +- .../rmgr_data_type_print_fns.c | 2 +- .../rmgr_data_type_release_fns.c | 2 +- .../rmgr_data_type_size_fns.c | 2 +- .../rmgr_data_type_unpacking_fns.c | 2 +- .../base/rmgr_base_check_context.c} | 12 +- orte/mca/rmgr/base/rmgr_base_context.c | 11 +- orte/mca/rmgr/base/rmgr_base_open.c | 25 +- orte/mca/rmgr/base/rmgr_base_pack.c | 218 ---- orte/mca/rmgr/base/rmgr_base_receive.c | 205 ++++ orte/mca/rmgr/base/rmgr_base_select.c | 5 + orte/mca/rmgr/base/rmgr_base_stage_gate.c | 273 +---- orte/mca/rmgr/base/rmgr_base_stubs.c | 56 +- orte/mca/rmgr/base/rmgr_base_unpack.c | 283 ----- .../rmgr/base/rmgr_base_vpid_support_fns.c | 152 +++ orte/mca/rmgr/base/rmgr_private.h | 164 +++ orte/mca/rmgr/cnos/rmgr_cnos.c | 208 ++-- orte/mca/rmgr/cnos/rmgr_cnos_component.c | 7 +- orte/mca/rmgr/proxy/rmgr_proxy.c | 344 +----- orte/mca/rmgr/proxy/rmgr_proxy_component.c | 10 +- orte/mca/rmgr/rmgr.h | 201 ++-- orte/mca/rmgr/rmgr_types.h | 21 +- orte/mca/rmgr/urm/rmgr_urm.c | 308 ++--- orte/mca/rmgr/urm/rmgr_urm.h | 6 - orte/mca/rmgr/urm/rmgr_urm_component.c | 178 +-- orte/mca/rml/rml_types.h | 27 +- orte/mca/schema/schema_types.h | 13 +- orte/mca/smr/Makefile.am | 4 +- orte/mca/smr/base/Makefile.am | 1 + orte/mca/smr/base/smr_base_local_functions.c | 14 +- orte/mca/smr/base/smr_base_open.c | 4 + orte/mca/smr/base/smr_base_set_proc_state.c | 10 +- orte/mca/smr/base/smr_base_trig_init_fns.c | 448 +++++++ orte/mca/smr/base/smr_private.h | 22 + orte/mca/smr/bproc/smr_bproc.c | 4 + orte/mca/smr/smr.h | 88 +- orte/mca/smr/smr_types.h | 43 +- orte/runtime/orte_abort.c | 77 +- orte/runtime/orte_init_stage1.c | 106 +- orte/runtime/orte_system_finalize.c | 23 +- orte/runtime/runtime.h | 14 +- orte/test/mpi/Makefile | 2 +- orte/test/mpi/abort.c | 25 + orte/test/mpi/multi_abort.c | 25 + orte/tools/console/orteconsole.h | 10 + orte/tools/orte-ps/orte-ps.c | 10 +- orte/tools/orted/Makefile.am | 4 +- orte/tools/orted/orted.c | 474 +++++--- orte/tools/orted/orted.h | 28 +- orte/tools/orteprobe/orteprobe.c | 19 +- orte/tools/orterun/help-orterun.txt | 4 + orte/tools/orterun/orterun.c | 72 +- orte/tools/orterun/totalview.c | 2 +- orte/util/proc_info.c | 6 + 296 files changed, 13934 insertions(+), 6042 deletions(-) create mode 100644 orte/mca/errmgr/base/errmgr_base_receive.c create mode 100644 orte/mca/errmgr/base/errmgr_private.h create mode 100644 orte/mca/errmgr/bproc/Makefile.am rename orte/mca/{pls/bproc_orted => errmgr/bproc}/configure.m4 (62%) create mode 100644 orte/mca/errmgr/bproc/configure.params create mode 100644 orte/mca/errmgr/bproc/errmgr_bproc.c create mode 100644 orte/mca/errmgr/bproc/errmgr_bproc.h create mode 100644 orte/mca/errmgr/bproc/errmgr_bproc_component.c create mode 100644 orte/mca/errmgr/hnp/Makefile.am create mode 100644 orte/mca/errmgr/hnp/configure.params create mode 100644 orte/mca/errmgr/hnp/errmgr_hnp.c create mode 100644 orte/mca/errmgr/hnp/errmgr_hnp.h create mode 100644 orte/mca/errmgr/hnp/errmgr_hnp_component.c create mode 100644 orte/mca/errmgr/orted/Makefile.am create mode 100644 orte/mca/errmgr/orted/configure.params create mode 100644 orte/mca/errmgr/orted/errmgr_orted.c create mode 100644 orte/mca/errmgr/orted/errmgr_orted.h create mode 100644 orte/mca/errmgr/orted/errmgr_orted_component.c create mode 100644 orte/mca/errmgr/proxy/Makefile.am create mode 100644 orte/mca/errmgr/proxy/configure.params create mode 100644 orte/mca/errmgr/proxy/errmgr_proxy.c create mode 100644 orte/mca/errmgr/proxy/errmgr_proxy.h create mode 100644 orte/mca/errmgr/proxy/errmgr_proxy_component.c create mode 100644 orte/mca/odls/Makefile.am create mode 100644 orte/mca/odls/base/Makefile.am create mode 100644 orte/mca/odls/base/base.h create mode 100755 orte/mca/odls/base/data_type_support/odls_compare_fns.c create mode 100755 orte/mca/odls/base/data_type_support/odls_copy_fns.c create mode 100644 orte/mca/odls/base/data_type_support/odls_packing_fns.c create mode 100755 orte/mca/odls/base/data_type_support/odls_print_fns.c create mode 100644 orte/mca/odls/base/data_type_support/odls_release_fns.c create mode 100755 orte/mca/odls/base/data_type_support/odls_size_fns.c create mode 100644 orte/mca/odls/base/data_type_support/odls_unpacking_fns.c create mode 100644 orte/mca/odls/base/odls_base_close.c create mode 100644 orte/mca/odls/base/odls_base_open.c create mode 100644 orte/mca/odls/base/odls_base_select.c create mode 100644 orte/mca/odls/base/odls_private.h rename orte/mca/{pls/bproc_orted => odls/bproc}/Makefile.am (63%) create mode 100644 orte/mca/odls/bproc/configure.m4 create mode 100644 orte/mca/odls/bproc/configure.params rename orte/mca/{pls/bproc_orted/pls_bproc_orted.c => odls/bproc/odls_bproc.c} (55%) create mode 100644 orte/mca/odls/bproc/odls_bproc.h rename orte/mca/{pls/bproc_orted/pls_bproc_orted_component.c => odls/bproc/odls_bproc_component.c} (51%) create mode 100644 orte/mca/odls/default/Makefile.am rename orte/mca/{pls/fork => odls/default}/configure.m4 (88%) rename orte/mca/{pls/fork => odls/default}/configure.params (94%) rename orte/mca/{pls/fork/help-orte-pls-fork.txt => odls/default/help-odls-default.txt} (70%) create mode 100644 orte/mca/odls/default/odls_default.h create mode 100644 orte/mca/odls/default/odls_default_component.c create mode 100644 orte/mca/odls/default/odls_default_module.c create mode 100644 orte/mca/odls/odls.h create mode 100644 orte/mca/odls/odls_types.h create mode 100644 orte/mca/odls/windows/.ompi_ignore rename orte/mca/{pls/process => odls/windows}/Makefile.am (100%) mode change 100644 => 100755 rename orte/mca/{pls/process => odls/windows}/configure.m4 (78%) mode change 100644 => 100755 rename orte/mca/{pls/process => odls/windows}/configure.params (88%) mode change 100644 => 100755 rename orte/mca/{pls/process/help-orte-pls-process.txt => odls/windows/help-odls-windows.txt} (100%) mode change 100644 => 100755 rename orte/mca/{pls/process/pls_process.h => odls/windows/odls_windows.h} (100%) mode change 100644 => 100755 rename orte/mca/{pls/process/pls_process_component.c => odls/windows/odls_windows_component.c} (100%) mode change 100644 => 100755 rename orte/mca/{pls/process/pls_process_module.c => odls/windows/odls_windows_module.c} (100%) mode change 100644 => 100755 create mode 100644 orte/mca/pls/base/pls_base_dmn_registry_fns.c create mode 100644 orte/mca/pls/base/pls_base_general_support_fns.c create mode 100644 orte/mca/pls/base/pls_base_orted_cmds.c delete mode 100644 orte/mca/pls/base/pls_base_proxy.c create mode 100644 orte/mca/pls/base/pls_base_receive.c create mode 100644 orte/mca/pls/base/pls_private.h rename orte/mca/pls/{base/pls_base_state.c => bproc/pls_bproc_state.c} (77%) delete mode 100644 orte/mca/pls/bproc_orted/pls_bproc_orted.h create mode 100644 orte/mca/pls/cnos/Makefile.am create mode 100644 orte/mca/pls/cnos/configure.m4 create mode 100644 orte/mca/pls/cnos/configure.params create mode 100644 orte/mca/pls/cnos/pls_cnos.c create mode 100644 orte/mca/pls/cnos/pls_cnos.h create mode 100644 orte/mca/pls/cnos/pls_cnos_component.c delete mode 100644 orte/mca/pls/fork/pls_fork.h delete mode 100644 orte/mca/pls/fork/pls_fork_component.c delete mode 100644 orte/mca/pls/fork/pls_fork_module.c create mode 100644 orte/mca/pls/pls_types.h rename orte/mca/pls/{fork => proxy}/Makefile.am (72%) rename orte/mca/pls/{bproc_orted => proxy}/configure.params (95%) create mode 100644 orte/mca/pls/proxy/pls_proxy.c create mode 100644 orte/mca/pls/proxy/pls_proxy.h create mode 100644 orte/mca/pls/proxy/pls_proxy_component.c create mode 100644 orte/mca/pls/xgrid/.ompi_ignore create mode 100644 orte/mca/ras/base/ras_base_no_ops.c delete mode 100644 orte/mca/ras/base/ras_base_node.h create mode 100644 orte/mca/ras/base/ras_base_receive.c create mode 100644 orte/mca/ras/base/ras_private.h create mode 100644 orte/mca/ras/proxy/Makefile.am create mode 100644 orte/mca/ras/proxy/configure.params create mode 100644 orte/mca/ras/proxy/ras_proxy.c create mode 100644 orte/mca/ras/proxy/ras_proxy.h create mode 100644 orte/mca/ras/proxy/ras_proxy_component.c create mode 100644 orte/mca/rds/base/rds_base_no_ops.c create mode 100644 orte/mca/rds/base/rds_base_receive.c create mode 100644 orte/mca/rds/base/rds_private.h create mode 100644 orte/mca/rds/proxy/Makefile.am create mode 100644 orte/mca/rds/proxy/configure.params create mode 100644 orte/mca/rds/proxy/rds_proxy.c create mode 100644 orte/mca/rds/proxy/rds_proxy.h create mode 100644 orte/mca/rds/proxy/rds_proxy_component.c create mode 100644 orte/mca/rmaps/base/rmaps_base_find_avail.c delete mode 100644 orte/mca/rmaps/base/rmaps_base_map.h rename orte/mca/rmaps/base/{rmaps_base_select.c => rmaps_base_map_job.c} (65%) create mode 100644 orte/mca/rmaps/base/rmaps_base_no_ops.c delete mode 100644 orte/mca/rmaps/base/rmaps_base_node.h create mode 100644 orte/mca/rmaps/base/rmaps_base_receive.c create mode 100644 orte/mca/rmaps/base/rmaps_private.h create mode 100644 orte/mca/rmaps/proxy/Makefile.am create mode 100644 orte/mca/rmaps/proxy/configure.params create mode 100644 orte/mca/rmaps/proxy/rmaps_proxy.c create mode 100644 orte/mca/rmaps/proxy/rmaps_proxy.h create mode 100644 orte/mca/rmaps/proxy/rmaps_proxy_component.c rename orte/mca/{pls/base/pls_base_context.c => rmgr/base/rmgr_base_check_context.c} (95%) delete mode 100644 orte/mca/rmgr/base/rmgr_base_pack.c create mode 100644 orte/mca/rmgr/base/rmgr_base_receive.c delete mode 100644 orte/mca/rmgr/base/rmgr_base_unpack.c create mode 100644 orte/mca/rmgr/base/rmgr_base_vpid_support_fns.c create mode 100644 orte/mca/rmgr/base/rmgr_private.h create mode 100644 orte/mca/smr/base/smr_base_trig_init_fns.c create mode 100644 orte/test/mpi/abort.c create mode 100644 orte/test/mpi/multi_abort.c diff --git a/ompi/communicator/comm_dyn.c b/ompi/communicator/comm_dyn.c index ac9cbdf651..30beec0cac 100644 --- a/ompi/communicator/comm_dyn.c +++ b/ompi/communicator/comm_dyn.c @@ -373,7 +373,7 @@ ompi_comm_start_processes(int count, char **array_of_commands, * later override this value by providing an MPI_Info value. for now, though, * let's get the default value off the registry */ - if (ORTE_SUCCESS != (rc = orte_rmgr_base_get_app_context(orte_process_info.my_name->jobid, &apps, &num_apps))) { + if (ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(orte_process_info.my_name->jobid, &apps, &num_apps))) { ORTE_ERROR_LOG(rc); return rc; } @@ -533,7 +533,7 @@ ompi_comm_start_processes(int count, char **array_of_commands, if (NULL != base_prefix) free(base_prefix); /* spawn procs */ - if (ORTE_SUCCESS != (rc = orte_rmgr.spawn(apps, count, &new_jobid, NULL, ORTE_PROC_STATE_NONE))) { + if (ORTE_SUCCESS != (rc = orte_rmgr.spawn_job(apps, count, &new_jobid, NULL, ORTE_PROC_STATE_NONE))) { ORTE_ERROR_LOG(rc); opal_progress_event_decrement(); return MPI_ERR_SPAWN; diff --git a/ompi/mca/btl/base/btl_base_select.c b/ompi/mca/btl/base/btl_base_select.c index 7fe99905b8..5b4053aa6e 100644 --- a/ompi/mca/btl/base/btl_base_select.c +++ b/ompi/mca/btl/base/btl_base_select.c @@ -27,7 +27,7 @@ #include "ompi/mca/pml/pml.h" #include "ompi/mca/btl/btl.h" #include "ompi/mca/btl/base/base.h" -#include "orte/runtime/runtime.h" +#include "orte/mca/errmgr/errmgr.h" OBJ_CLASS_INSTANCE( mca_btl_base_selected_module_t, @@ -151,7 +151,7 @@ int mca_btl_base_select(bool enable_progress_threads, if (0 == opal_list_get_size(&mca_btl_base_modules_initialized)) { opal_show_help("help-mca-base.txt", "find-available:none-found", true, "btl"); - orte_abort(1, ""); + orte_errmgr.error_detected(1, NULL); } return OMPI_SUCCESS; } diff --git a/ompi/mca/btl/tcp/btl_tcp_component.c b/ompi/mca/btl/tcp/btl_tcp_component.c index 4271164e95..f7bcb1ae1c 100644 --- a/ompi/mca/btl/tcp/btl_tcp_component.c +++ b/ompi/mca/btl/tcp/btl_tcp_component.c @@ -391,7 +391,7 @@ static int mca_btl_tcp_component_create_listen(void) { int flags; struct sockaddr_in inaddr; - ompi_socklen_t addrlen; + opal_socklen_t addrlen; /* create a listen socket for incoming connections */ mca_btl_tcp_component.tcp_listen_sd = socket(AF_INET, SOCK_STREAM, 0); @@ -556,7 +556,7 @@ int mca_btl_tcp_component_control(int param, void* value, size_t size) static void mca_btl_tcp_component_accept(void) { while(true) { - ompi_socklen_t addrlen = sizeof(struct sockaddr_in); + opal_socklen_t addrlen = sizeof(struct sockaddr_in); struct sockaddr_in addr; mca_btl_tcp_event_t *event; int sd = accept(mca_btl_tcp_component.tcp_listen_sd, (struct sockaddr*)&addr, &addrlen); @@ -588,7 +588,7 @@ static void mca_btl_tcp_component_recv_handler(int sd, short flags, void* user) struct sockaddr_in addr; int retval; mca_btl_tcp_proc_t* btl_proc; - ompi_socklen_t addr_len = sizeof(addr); + opal_socklen_t addr_len = sizeof(addr); mca_btl_tcp_event_t *event = (mca_btl_tcp_event_t *)user; /* accept new connections on the listen socket */ diff --git a/ompi/mca/btl/tcp/btl_tcp_endpoint.c b/ompi/mca/btl/tcp/btl_tcp_endpoint.c index 5c5b1a2347..1a5c38267e 100644 --- a/ompi/mca/btl/tcp/btl_tcp_endpoint.c +++ b/ompi/mca/btl/tcp/btl_tcp_endpoint.c @@ -133,8 +133,8 @@ static void mca_btl_tcp_endpoint_dump(mca_btl_base_endpoint_t* btl_endpoint, con char dst[64]; int sndbuf,rcvbuf,nodelay,flags; struct sockaddr_in inaddr; - ompi_socklen_t obtlen; - ompi_socklen_t addrlen = sizeof(struct sockaddr_in); + opal_socklen_t obtlen; + opal_socklen_t addrlen = sizeof(struct sockaddr_in); getsockname(btl_endpoint->endpoint_sd, (struct sockaddr*)&inaddr, &addrlen); sprintf(src, "%s", inet_ntoa(inaddr.sin_addr)); @@ -553,7 +553,7 @@ static int mca_btl_tcp_endpoint_start_connect(mca_btl_base_endpoint_t* btl_endpo static void mca_btl_tcp_endpoint_complete_connect(mca_btl_base_endpoint_t* btl_endpoint) { int so_error = 0; - ompi_socklen_t so_length = sizeof(so_error); + opal_socklen_t so_length = sizeof(so_error); /* unregister from receiving event notifications */ opal_event_del(&btl_endpoint->endpoint_send_event); diff --git a/ompi/mca/mtl/psm/Makefile.in b/ompi/mca/mtl/psm/Makefile.in index 49aa27b89c..53f06758f4 100644 --- a/ompi/mca/mtl/psm/Makefile.in +++ b/ompi/mca/mtl/psm/Makefile.in @@ -148,17 +148,16 @@ am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \ $(top_srcdir)/opal/mca/timer/linux/configure.m4 \ $(top_srcdir)/opal/mca/timer/solaris/configure.m4 \ $(top_srcdir)/opal/mca/timer/windows/configure.m4 \ + $(top_srcdir)/orte/mca/errmgr/bproc/configure.m4 \ + $(top_srcdir)/orte/mca/odls/bproc/configure.m4 \ + $(top_srcdir)/orte/mca/odls/default/configure.m4 \ $(top_srcdir)/orte/mca/oob/tcp/configure.m4 \ $(top_srcdir)/orte/mca/pls/bproc/configure.m4 \ - $(top_srcdir)/orte/mca/pls/bproc_orted/configure.m4 \ - $(top_srcdir)/orte/mca/pls/fork/configure.m4 \ $(top_srcdir)/orte/mca/pls/gridengine/configure.m4 \ $(top_srcdir)/orte/mca/pls/poe/configure.m4 \ - $(top_srcdir)/orte/mca/pls/process/configure.m4 \ $(top_srcdir)/orte/mca/pls/rsh/configure.m4 \ $(top_srcdir)/orte/mca/pls/slurm/configure.m4 \ $(top_srcdir)/orte/mca/pls/tm/configure.m4 \ - $(top_srcdir)/orte/mca/pls/xgrid/configure.m4 \ $(top_srcdir)/orte/mca/ras/bjs/configure.m4 \ $(top_srcdir)/orte/mca/ras/gridengine/configure.m4 \ $(top_srcdir)/orte/mca/ras/lsf_bproc/configure.m4 \ @@ -416,6 +415,13 @@ MCA_ns_DSO_SUBDIRS = @MCA_ns_DSO_SUBDIRS@ MCA_ns_STATIC_COMPONENTS = @MCA_ns_STATIC_COMPONENTS@ MCA_ns_STATIC_LTLIBS = @MCA_ns_STATIC_LTLIBS@ MCA_ns_STATIC_SUBDIRS = @MCA_ns_STATIC_SUBDIRS@ +MCA_odls_ALL_COMPONENTS = @MCA_odls_ALL_COMPONENTS@ +MCA_odls_ALL_SUBDIRS = @MCA_odls_ALL_SUBDIRS@ +MCA_odls_DSO_COMPONENTS = @MCA_odls_DSO_COMPONENTS@ +MCA_odls_DSO_SUBDIRS = @MCA_odls_DSO_SUBDIRS@ +MCA_odls_STATIC_COMPONENTS = @MCA_odls_STATIC_COMPONENTS@ +MCA_odls_STATIC_LTLIBS = @MCA_odls_STATIC_LTLIBS@ +MCA_odls_STATIC_SUBDIRS = @MCA_odls_STATIC_SUBDIRS@ MCA_ompi_FRAMEWORKS = @MCA_ompi_FRAMEWORKS@ MCA_ompi_FRAMEWORKS_SUBDIRS = @MCA_ompi_FRAMEWORKS_SUBDIRS@ MCA_ompi_FRAMEWORK_COMPONENT_ALL_SUBDIRS = @MCA_ompi_FRAMEWORK_COMPONENT_ALL_SUBDIRS@ @@ -609,6 +615,14 @@ OMPI_BUILD_common_portals_DSO_FALSE = @OMPI_BUILD_common_portals_DSO_FALSE@ OMPI_BUILD_common_portals_DSO_TRUE = @OMPI_BUILD_common_portals_DSO_TRUE@ OMPI_BUILD_common_sm_DSO_FALSE = @OMPI_BUILD_common_sm_DSO_FALSE@ OMPI_BUILD_common_sm_DSO_TRUE = @OMPI_BUILD_common_sm_DSO_TRUE@ +OMPI_BUILD_errmgr_bproc_DSO_FALSE = @OMPI_BUILD_errmgr_bproc_DSO_FALSE@ +OMPI_BUILD_errmgr_bproc_DSO_TRUE = @OMPI_BUILD_errmgr_bproc_DSO_TRUE@ +OMPI_BUILD_errmgr_hnp_DSO_FALSE = @OMPI_BUILD_errmgr_hnp_DSO_FALSE@ +OMPI_BUILD_errmgr_hnp_DSO_TRUE = @OMPI_BUILD_errmgr_hnp_DSO_TRUE@ +OMPI_BUILD_errmgr_orted_DSO_FALSE = @OMPI_BUILD_errmgr_orted_DSO_FALSE@ +OMPI_BUILD_errmgr_orted_DSO_TRUE = @OMPI_BUILD_errmgr_orted_DSO_TRUE@ +OMPI_BUILD_errmgr_proxy_DSO_FALSE = @OMPI_BUILD_errmgr_proxy_DSO_FALSE@ +OMPI_BUILD_errmgr_proxy_DSO_TRUE = @OMPI_BUILD_errmgr_proxy_DSO_TRUE@ OMPI_BUILD_gpr_null_DSO_FALSE = @OMPI_BUILD_gpr_null_DSO_FALSE@ OMPI_BUILD_gpr_null_DSO_TRUE = @OMPI_BUILD_gpr_null_DSO_TRUE@ OMPI_BUILD_gpr_proxy_DSO_FALSE = @OMPI_BUILD_gpr_proxy_DSO_FALSE@ @@ -651,6 +665,10 @@ OMPI_BUILD_ns_proxy_DSO_FALSE = @OMPI_BUILD_ns_proxy_DSO_FALSE@ OMPI_BUILD_ns_proxy_DSO_TRUE = @OMPI_BUILD_ns_proxy_DSO_TRUE@ OMPI_BUILD_ns_replica_DSO_FALSE = @OMPI_BUILD_ns_replica_DSO_FALSE@ OMPI_BUILD_ns_replica_DSO_TRUE = @OMPI_BUILD_ns_replica_DSO_TRUE@ +OMPI_BUILD_odls_bproc_DSO_FALSE = @OMPI_BUILD_odls_bproc_DSO_FALSE@ +OMPI_BUILD_odls_bproc_DSO_TRUE = @OMPI_BUILD_odls_bproc_DSO_TRUE@ +OMPI_BUILD_odls_default_DSO_FALSE = @OMPI_BUILD_odls_default_DSO_FALSE@ +OMPI_BUILD_odls_default_DSO_TRUE = @OMPI_BUILD_odls_default_DSO_TRUE@ OMPI_BUILD_oob_tcp_DSO_FALSE = @OMPI_BUILD_oob_tcp_DSO_FALSE@ OMPI_BUILD_oob_tcp_DSO_TRUE = @OMPI_BUILD_oob_tcp_DSO_TRUE@ OMPI_BUILD_osc_pt2pt_DSO_FALSE = @OMPI_BUILD_osc_pt2pt_DSO_FALSE@ @@ -665,24 +683,18 @@ OMPI_BUILD_paffinity_windows_DSO_FALSE = @OMPI_BUILD_paffinity_windows_DSO_FALSE OMPI_BUILD_paffinity_windows_DSO_TRUE = @OMPI_BUILD_paffinity_windows_DSO_TRUE@ OMPI_BUILD_pls_bproc_DSO_FALSE = @OMPI_BUILD_pls_bproc_DSO_FALSE@ OMPI_BUILD_pls_bproc_DSO_TRUE = @OMPI_BUILD_pls_bproc_DSO_TRUE@ -OMPI_BUILD_pls_bproc_orted_DSO_FALSE = @OMPI_BUILD_pls_bproc_orted_DSO_FALSE@ -OMPI_BUILD_pls_bproc_orted_DSO_TRUE = @OMPI_BUILD_pls_bproc_orted_DSO_TRUE@ -OMPI_BUILD_pls_fork_DSO_FALSE = @OMPI_BUILD_pls_fork_DSO_FALSE@ -OMPI_BUILD_pls_fork_DSO_TRUE = @OMPI_BUILD_pls_fork_DSO_TRUE@ OMPI_BUILD_pls_gridengine_DSO_FALSE = @OMPI_BUILD_pls_gridengine_DSO_FALSE@ OMPI_BUILD_pls_gridengine_DSO_TRUE = @OMPI_BUILD_pls_gridengine_DSO_TRUE@ OMPI_BUILD_pls_poe_DSO_FALSE = @OMPI_BUILD_pls_poe_DSO_FALSE@ OMPI_BUILD_pls_poe_DSO_TRUE = @OMPI_BUILD_pls_poe_DSO_TRUE@ -OMPI_BUILD_pls_process_DSO_FALSE = @OMPI_BUILD_pls_process_DSO_FALSE@ -OMPI_BUILD_pls_process_DSO_TRUE = @OMPI_BUILD_pls_process_DSO_TRUE@ +OMPI_BUILD_pls_proxy_DSO_FALSE = @OMPI_BUILD_pls_proxy_DSO_FALSE@ +OMPI_BUILD_pls_proxy_DSO_TRUE = @OMPI_BUILD_pls_proxy_DSO_TRUE@ OMPI_BUILD_pls_rsh_DSO_FALSE = @OMPI_BUILD_pls_rsh_DSO_FALSE@ OMPI_BUILD_pls_rsh_DSO_TRUE = @OMPI_BUILD_pls_rsh_DSO_TRUE@ OMPI_BUILD_pls_slurm_DSO_FALSE = @OMPI_BUILD_pls_slurm_DSO_FALSE@ OMPI_BUILD_pls_slurm_DSO_TRUE = @OMPI_BUILD_pls_slurm_DSO_TRUE@ OMPI_BUILD_pls_tm_DSO_FALSE = @OMPI_BUILD_pls_tm_DSO_FALSE@ OMPI_BUILD_pls_tm_DSO_TRUE = @OMPI_BUILD_pls_tm_DSO_TRUE@ -OMPI_BUILD_pls_xgrid_DSO_FALSE = @OMPI_BUILD_pls_xgrid_DSO_FALSE@ -OMPI_BUILD_pls_xgrid_DSO_TRUE = @OMPI_BUILD_pls_xgrid_DSO_TRUE@ OMPI_BUILD_pml_cm_DSO_FALSE = @OMPI_BUILD_pml_cm_DSO_FALSE@ OMPI_BUILD_pml_cm_DSO_TRUE = @OMPI_BUILD_pml_cm_DSO_TRUE@ OMPI_BUILD_pml_dr_DSO_FALSE = @OMPI_BUILD_pml_dr_DSO_FALSE@ @@ -703,6 +715,8 @@ OMPI_BUILD_ras_lsf_bproc_DSO_FALSE = @OMPI_BUILD_ras_lsf_bproc_DSO_FALSE@ OMPI_BUILD_ras_lsf_bproc_DSO_TRUE = @OMPI_BUILD_ras_lsf_bproc_DSO_TRUE@ OMPI_BUILD_ras_poe_DSO_FALSE = @OMPI_BUILD_ras_poe_DSO_FALSE@ OMPI_BUILD_ras_poe_DSO_TRUE = @OMPI_BUILD_ras_poe_DSO_TRUE@ +OMPI_BUILD_ras_proxy_DSO_FALSE = @OMPI_BUILD_ras_proxy_DSO_FALSE@ +OMPI_BUILD_ras_proxy_DSO_TRUE = @OMPI_BUILD_ras_proxy_DSO_TRUE@ OMPI_BUILD_ras_slurm_DSO_FALSE = @OMPI_BUILD_ras_slurm_DSO_FALSE@ OMPI_BUILD_ras_slurm_DSO_TRUE = @OMPI_BUILD_ras_slurm_DSO_TRUE@ OMPI_BUILD_ras_tm_DSO_FALSE = @OMPI_BUILD_ras_tm_DSO_FALSE@ @@ -715,8 +729,12 @@ OMPI_BUILD_rcache_vma_DSO_FALSE = @OMPI_BUILD_rcache_vma_DSO_FALSE@ OMPI_BUILD_rcache_vma_DSO_TRUE = @OMPI_BUILD_rcache_vma_DSO_TRUE@ OMPI_BUILD_rds_hostfile_DSO_FALSE = @OMPI_BUILD_rds_hostfile_DSO_FALSE@ OMPI_BUILD_rds_hostfile_DSO_TRUE = @OMPI_BUILD_rds_hostfile_DSO_TRUE@ +OMPI_BUILD_rds_proxy_DSO_FALSE = @OMPI_BUILD_rds_proxy_DSO_FALSE@ +OMPI_BUILD_rds_proxy_DSO_TRUE = @OMPI_BUILD_rds_proxy_DSO_TRUE@ OMPI_BUILD_rds_resfile_DSO_FALSE = @OMPI_BUILD_rds_resfile_DSO_FALSE@ OMPI_BUILD_rds_resfile_DSO_TRUE = @OMPI_BUILD_rds_resfile_DSO_TRUE@ +OMPI_BUILD_rmaps_proxy_DSO_FALSE = @OMPI_BUILD_rmaps_proxy_DSO_FALSE@ +OMPI_BUILD_rmaps_proxy_DSO_TRUE = @OMPI_BUILD_rmaps_proxy_DSO_TRUE@ OMPI_BUILD_rmaps_round_robin_DSO_FALSE = @OMPI_BUILD_rmaps_round_robin_DSO_FALSE@ OMPI_BUILD_rmaps_round_robin_DSO_TRUE = @OMPI_BUILD_rmaps_round_robin_DSO_TRUE@ OMPI_BUILD_rmgr_cnos_DSO_FALSE = @OMPI_BUILD_rmgr_cnos_DSO_FALSE@ @@ -971,6 +989,9 @@ common_portals_CPPFLAGS = @common_portals_CPPFLAGS@ common_portals_LDFLAGS = @common_portals_LDFLAGS@ common_portals_LIBS = @common_portals_LIBS@ datadir = @datadir@ +errmgr_bproc_CPPFLAGS = @errmgr_bproc_CPPFLAGS@ +errmgr_bproc_LDFLAGS = @errmgr_bproc_LDFLAGS@ +errmgr_bproc_LIBS = @errmgr_bproc_LIBS@ exec_prefix = @exec_prefix@ host = @host@ host_alias = @host_alias@ @@ -1017,21 +1038,19 @@ mtl_psm_CFLAGS = @mtl_psm_CFLAGS@ mtl_psm_CPPFLAGS = @mtl_psm_CPPFLAGS@ mtl_psm_LDFLAGS = @mtl_psm_LDFLAGS@ mtl_psm_LIBS = @mtl_psm_LIBS@ +odls_bproc_CPPFLAGS = @odls_bproc_CPPFLAGS@ +odls_bproc_LDFLAGS = @odls_bproc_LDFLAGS@ +odls_bproc_LIBS = @odls_bproc_LIBS@ oldincludedir = @oldincludedir@ pls_bproc_CPPFLAGS = @pls_bproc_CPPFLAGS@ pls_bproc_LDFLAGS = @pls_bproc_LDFLAGS@ pls_bproc_LIBS = @pls_bproc_LIBS@ -pls_bproc_orted_CPPFLAGS = @pls_bproc_orted_CPPFLAGS@ -pls_bproc_orted_LDFLAGS = @pls_bproc_orted_LDFLAGS@ -pls_bproc_orted_LIBS = @pls_bproc_orted_LIBS@ pls_slurm_CPPFLAGS = @pls_slurm_CPPFLAGS@ pls_slurm_LDFLAGS = @pls_slurm_LDFLAGS@ pls_slurm_LIBS = @pls_slurm_LIBS@ pls_tm_CPPFLAGS = @pls_tm_CPPFLAGS@ pls_tm_LDFLAGS = @pls_tm_LDFLAGS@ pls_tm_LIBS = @pls_tm_LIBS@ -pls_xgrid_LDFLAGS = @pls_xgrid_LDFLAGS@ -pls_xgrid_OBJCFLAGS = @pls_xgrid_OBJCFLAGS@ prefix = @prefix@ program_transform_name = @program_transform_name@ ras_bjs_CPPFLAGS = @ras_bjs_CPPFLAGS@ diff --git a/ompi/mca/pml/base/pml_base_select.c b/ompi/mca/pml/base/pml_base_select.c index cde4d34326..3672037853 100644 --- a/ompi/mca/pml/base/pml_base_select.c +++ b/ompi/mca/pml/base/pml_base_select.c @@ -24,7 +24,7 @@ #include "opal/runtime/opal_progress.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" -#include "orte/runtime/runtime.h" +#include "orte/mca/errmgr/errmgr.h" #include "ompi/constants.h" #include "ompi/mca/pml/pml.h" #include "ompi/mca/pml/base/base.h" @@ -117,9 +117,9 @@ int mca_pml_base_select(bool enable_progress_threads, if( NULL == best_component ) { opal_show_help("help-mca-base.txt", "find-available:none-found", true, "pml"); if( NULL != mca_pml_base_pml ) { - orte_abort( 1, "PML %s cannot be selected", mca_pml_base_pml ); + orte_errmgr.error_detected(1, "PML %s cannot be selected", mca_pml_base_pml, NULL); } else { - orte_abort(1, "No pml component available. This shouldn't happen."); + orte_errmgr.error_detected(2, "No pml component available. This shouldn't happen.", NULL); } } diff --git a/ompi/runtime/ompi_mpi_abort.c b/ompi/runtime/ompi_mpi_abort.c index 08762301eb..c21d2f8534 100644 --- a/ompi/runtime/ompi_mpi_abort.c +++ b/ompi/runtime/ompi_mpi_abort.c @@ -40,6 +40,8 @@ #include "orte/util/proc_info.h" #include "orte/runtime/runtime.h" #include "orte/mca/ns/ns.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/pls/pls.h" #include "orte/mca/rmgr/rmgr.h" #include "ompi/communicator/communicator.h" #include "ompi/proc/proc.h" @@ -50,6 +52,7 @@ #include #endif +#if 0 static int abort_procs(ompi_proc_t **procs, int proc_count, @@ -66,14 +69,14 @@ abort_procs(ompi_proc_t **procs, int proc_count, } if (jobid == my_jobid) continue; - killret = orte_rmgr.terminate_job(jobid); + killret = orte_pls.terminate_job(jobid); if (OMPI_SUCCESS != killret) ret = killret; } return ret; } - +#endif int ompi_mpi_abort(struct ompi_communicator_t* comm, @@ -143,7 +146,7 @@ ompi_mpi_abort(struct ompi_communicator_t* comm, } /* BWB - XXX - Should probably publish the error code somewhere */ - +#if 0 /* Kill everyone in the job. We may make this better someday to actually loop over ompi_rte_kill_proc() to only kill the procs in comm, and additionally to somehow use errorcode. */ @@ -167,7 +170,7 @@ ompi_mpi_abort(struct ompi_communicator_t* comm, comm->c_local_group->grp_proc_count, my_jobid); - ret = orte_rmgr.terminate_job(my_jobid); + ret = orte_pls.terminate_job(my_jobid); if (OMPI_SUCCESS == ret) { while (1) { @@ -188,6 +191,12 @@ ompi_mpi_abort(struct ompi_communicator_t* comm, just exit and let it become Somebody Elses Problem. */ exit(errcode); } - +#endif + + /* tell the error manager we detected an error - OpenRTE + * will take care of cleaning up for us + */ + orte_errmgr.error_detected(errcode, "MPI_Abort has been called", NULL); + return OMPI_SUCCESS; } diff --git a/ompi/tools/ompi_info/components.cc b/ompi/tools/ompi_info/components.cc index 9f5b923514..bae6a35cc3 100644 --- a/ompi/tools/ompi_info/components.cc +++ b/ompi/tools/ompi_info/components.cc @@ -221,7 +221,7 @@ void ompi_info::open_components() component_map["rml"] = &orte_rml_base.rml_components; orte_pls_base_open(); - component_map["pls"] = &orte_pls_base.pls_opened; + component_map["pls"] = &orte_pls_base.available_components; orte_sds_base_open(); component_map["sds"] = &orte_sds_base_components_available; diff --git a/opal/include/opal/types.h b/opal/include/opal/types.h index a00d1ae5e6..da88314d0f 100644 --- a/opal/include/opal/types.h +++ b/opal/include/opal/types.h @@ -101,9 +101,9 @@ typedef void* ompi_iov_base_ptr_t; */ #if defined(HAVE_SOCKLEN_T) -typedef socklen_t ompi_socklen_t; +typedef socklen_t opal_socklen_t; #else -typedef int ompi_socklen_t; +typedef int opal_socklen_t; #endif diff --git a/opal/mca/paffinity/linux/paffinity_linux_module.c b/opal/mca/paffinity/linux/paffinity_linux_module.c index 8104914bf6..139fafd27e 100644 --- a/opal/mca/paffinity/linux/paffinity_linux_module.c +++ b/opal/mca/paffinity/linux/paffinity_linux_module.c @@ -122,7 +122,7 @@ static int make_mask(unsigned int *len, unsigned long **mask) linux_module_get_num_procs(&num_procs); *len = num_procs / 8; - if (*len != num_procs * 8) { + if (*len != (unsigned int)num_procs * 8) { ++*len; } diff --git a/orte/dss/dss_arith.c b/orte/dss/dss_arith.c index faa3c2e9b5..8703800386 100755 --- a/orte/dss/dss_arith.c +++ b/orte/dss/dss_arith.c @@ -22,11 +22,12 @@ #if HAVE_NETINET_IN_H #include #endif +#include "opal/util/output.h" #include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/odls/odls_types.h" #include "orte/dss/dss_internal.h" -#include "opal/util/output.h" static void orte_dss_arith_int(int *value, int *operand, orte_dss_arith_op_t operation); static void orte_dss_arith_uint(uint *value, uint *operand, orte_dss_arith_op_t operation); diff --git a/orte/dss/dss_compare.c b/orte/dss/dss_compare.c index 1238112b68..f929cd6bfe 100755 --- a/orte/dss/dss_compare.c +++ b/orte/dss/dss_compare.c @@ -232,16 +232,6 @@ int orte_dss_compare_dt(orte_data_type_t *value1, orte_data_type_t *value2, orte return ORTE_EQUAL; } -/* ORTE_DAEMON_CMD */ -int orte_dss_compare_daemon_cmd(orte_daemon_cmd_flag_t *value1, orte_daemon_cmd_flag_t *value2, orte_data_type_t type) -{ - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - /* ORTE_DATA_VALUE */ int orte_dss_compare_data_value(orte_data_value_t *value1, orte_data_value_t *value2, orte_data_type_t type) { diff --git a/orte/dss/dss_copy.c b/orte/dss/dss_copy.c index 609756a1f1..f30f55e4dd 100755 --- a/orte/dss/dss_copy.c +++ b/orte/dss/dss_copy.c @@ -114,10 +114,6 @@ int orte_dss_std_copy(void **dest, void *src, orte_data_type_t type) datasize = sizeof(orte_data_type_t); break; - case ORTE_DAEMON_CMD: - datasize = sizeof(orte_daemon_cmd_flag_t); - break; - default: ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE); return ORTE_ERR_UNKNOWN_DATA_TYPE; diff --git a/orte/dss/dss_internal.h b/orte/dss/dss_internal.h index 43f859024c..7e957b2db5 100644 --- a/orte/dss/dss_internal.h +++ b/orte/dss/dss_internal.h @@ -112,6 +112,55 @@ extern "C" { #error Unsupported pid_t size! #endif +/* Unpack generic size macros */ +#define UNPACK_SIZE_MISMATCH(unpack_type, remote_type, ret) \ +do { \ + switch(remote_type) { \ + case ORTE_UINT8: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint8_t, remote_type); \ + break; \ + case ORTE_INT8: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int8_t, remote_type); \ + break; \ + case ORTE_UINT16: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint16_t, remote_type); \ + break; \ + case ORTE_INT16: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int16_t, remote_type); \ + break; \ + case ORTE_UINT32: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint32_t, remote_type); \ + break; \ + case ORTE_INT32: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int32_t, remote_type); \ + break; \ + case ORTE_UINT64: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint64_t, remote_type); \ + break; \ + case ORTE_INT64: \ + UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int64_t, remote_type); \ + break; \ + default: \ + ret = ORTE_ERR_NOT_FOUND; \ + ORTE_ERROR_LOG(ret); \ + } \ +} while (0) + +/* NOTE: do not need to deal with endianness here, as the unpacking of +the underling sender-side type will do that for us. Repeat: the +data in tmpbuf[] is already in host byte order. */ +#define UNPACK_SIZE_MISMATCH_FOUND(unpack_type, tmptype, tmpdsstype) \ +do { \ + orte_std_cntr_t i; \ + tmptype *tmpbuf = (tmptype*)malloc(sizeof(tmptype) * (*num_vals)); \ + ret = orte_dss_unpack_buffer(buffer, tmpbuf, num_vals, tmpdsstype); \ + for (i = 0 ; i < *num_vals ; ++i) { \ + ((unpack_type*) dest)[i] = (unpack_type)(tmpbuf[i]); \ + } \ + free(tmpbuf); \ +} while (0) + + /** * Internal struct used for holding registered dss functions */ @@ -256,9 +305,6 @@ extern orte_data_type_t orte_dss_num_reg_types; int orte_dss_pack_data_type(orte_buffer_t *buffer, void *src, orte_std_cntr_t num_vals, orte_data_type_t type); - int orte_dss_pack_daemon_cmd(orte_buffer_t *buffer, void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - int orte_dss_pack_data_value(orte_buffer_t *buffer, void *src, orte_std_cntr_t num_vals, orte_data_type_t type); @@ -301,9 +347,6 @@ extern orte_data_type_t orte_dss_num_reg_types; int orte_dss_unpack_data_type(orte_buffer_t *buffer, void *dest, orte_std_cntr_t *num_vals, orte_data_type_t type); - int orte_dss_unpack_daemon_cmd(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - int orte_dss_unpack_data_value(orte_buffer_t *buffer, void *dest, orte_std_cntr_t *num_vals, orte_data_type_t type); @@ -360,8 +403,6 @@ extern orte_data_type_t orte_dss_num_reg_types; int orte_dss_compare_dt(orte_data_type_t *value1, orte_data_type_t *value2, orte_data_type_t type); - int orte_dss_compare_daemon_cmd(orte_daemon_cmd_flag_t *value1, orte_daemon_cmd_flag_t *value2, orte_data_type_t type); - int orte_dss_compare_data_value(orte_data_value_t *value1, orte_data_value_t *value2, orte_data_type_t type); int orte_dss_compare_byte_object(orte_byte_object_t *value1, orte_byte_object_t *value2, orte_data_type_t type); @@ -405,7 +446,6 @@ extern orte_data_type_t orte_dss_num_reg_types; int orte_dss_print_null(char **output, char *prefix, void *src, orte_data_type_t type); int orte_dss_print_std_cntr(char **output, char *prefix, orte_std_cntr_t *src, orte_data_type_t type); int orte_dss_print_data_type(char **output, char *prefix, orte_data_type_t *src, orte_data_type_t type); - int orte_dss_print_daemon_cmd(char **output, char *prefix, orte_daemon_cmd_flag_t *src, orte_data_type_t type); int orte_dss_print_data_value(char **output, char *prefix, orte_data_value_t *src, orte_data_type_t type); int orte_dss_print_byte_object(char **output, char *prefix, orte_byte_object_t *src, orte_data_type_t type); diff --git a/orte/dss/dss_open_close.c b/orte/dss/dss_open_close.c index 0a5f10c5d5..a17cb8332e 100644 --- a/orte/dss/dss_open_close.c +++ b/orte/dss/dss_open_close.c @@ -426,19 +426,6 @@ int orte_dss_open(void) ORTE_ERROR_LOG(rc); return rc; } - tmp = ORTE_DAEMON_CMD; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_daemon_cmd, - orte_dss_unpack_daemon_cmd, - (orte_dss_copy_fn_t)orte_dss_std_copy, - (orte_dss_compare_fn_t)orte_dss_compare_daemon_cmd, - (orte_dss_size_fn_t)orte_dss_std_size, - (orte_dss_print_fn_t)orte_dss_print_daemon_cmd, - (orte_dss_release_fn_t)orte_dss_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_DATA_TYPE", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } tmp = ORTE_BYTE_OBJECT; if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_dss_pack_byte_object, orte_dss_unpack_byte_object, diff --git a/orte/dss/dss_pack.c b/orte/dss/dss_pack.c index e64d3271b2..f913879b5e 100644 --- a/orte/dss/dss_pack.c +++ b/orte/dss/dss_pack.c @@ -417,22 +417,6 @@ int orte_dss_pack_data_value(orte_buffer_t *buffer, void *src, orte_std_cntr_t n return ORTE_SUCCESS; } -/* - * ORTE_DAEMON_CMD - */ -int orte_dss_pack_daemon_cmd(orte_buffer_t *buffer, void *src, orte_std_cntr_t num_vals, - orte_data_type_t type) -{ - int ret; - - /* Turn around and pack the real type */ - if (ORTE_SUCCESS != (ret = orte_dss_pack_buffer(buffer, src, num_vals, ORTE_DAEMON_CMD_T))) { - ORTE_ERROR_LOG(ret); - } - - return ret; -} - /* * ORTE_BYTE_OBJECT */ diff --git a/orte/dss/dss_print.c b/orte/dss/dss_print.c index 7fd2161efd..ea90540f86 100755 --- a/orte/dss/dss_print.c +++ b/orte/dss/dss_print.c @@ -475,28 +475,6 @@ int orte_dss_print_data_value(char **output, char *prefix, orte_data_value_t *sr return ORTE_SUCCESS; } -/* - * ORTE_DAEMON_CMD - */ -int orte_dss_print_daemon_cmd(char **output, char *prefix, orte_daemon_cmd_flag_t *src, orte_data_type_t type) -{ - char *prefx; - - /* deal with NULL prefix */ - if (NULL == prefix) asprintf(&prefx, " "); - else prefx = prefix; - - /* if src is NULL, just print data type and return */ - if (NULL == src) { - asprintf(output, "%sData type: ORTE_DAEMON_CMD\tValue: NULL pointer", prefx); - return ORTE_SUCCESS; - } - - asprintf(output, "%sData type: ORTE_DAEMON_CMD\tValue: %lu", prefx, (unsigned long) *src); - - return ORTE_SUCCESS; -} - /* * ORTE_BYTE_OBJECT */ diff --git a/orte/dss/dss_size.c b/orte/dss/dss_size.c index 29ee9f1b87..1d61d4b120 100755 --- a/orte/dss/dss_size.c +++ b/orte/dss/dss_size.c @@ -109,10 +109,6 @@ int orte_dss_std_size(size_t *size, void *src, orte_data_type_t type) *size = sizeof(orte_data_type_t); break; - case ORTE_DAEMON_CMD: - *size = sizeof(orte_daemon_cmd_flag_t); - break; - default: *size = 0; ORTE_ERROR_LOG(ORTE_ERR_UNKNOWN_DATA_TYPE); diff --git a/orte/dss/dss_unpack.c b/orte/dss/dss_unpack.c index 1e8f125836..2c84bb7b85 100644 --- a/orte/dss/dss_unpack.c +++ b/orte/dss/dss_unpack.c @@ -31,54 +31,6 @@ #include "orte/dss/dss_internal.h" -#define UNPACK_SIZE_MISMATCH(unpack_type, remote_type, ret) \ - do { \ - switch(remote_type) { \ - case ORTE_UINT8: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint8_t, remote_type); \ - break; \ - case ORTE_INT8: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int8_t, remote_type); \ - break; \ - case ORTE_UINT16: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint16_t, remote_type); \ - break; \ - case ORTE_INT16: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int16_t, remote_type); \ - break; \ - case ORTE_UINT32: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint32_t, remote_type); \ - break; \ - case ORTE_INT32: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int32_t, remote_type); \ - break; \ - case ORTE_UINT64: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, uint64_t, remote_type); \ - break; \ - case ORTE_INT64: \ - UNPACK_SIZE_MISMATCH_FOUND(unpack_type, int64_t, remote_type); \ - break; \ - default: \ - ret = ORTE_ERR_NOT_FOUND; \ - ORTE_ERROR_LOG(ret); \ - } \ - } while (0) - -/* NOTE: do not need to deal with endianness here, as the unpacking of - the underling sender-side type will do that for us. Repeat: the - data in tmpbuf[] is already in host byte order. */ -#define UNPACK_SIZE_MISMATCH_FOUND(unpack_type, tmptype, tmpdsstype) \ - do { \ - orte_std_cntr_t i; \ - tmptype *tmpbuf = (tmptype*)malloc(sizeof(tmptype) * (*num_vals)); \ - ret = orte_dss_unpack_buffer(buffer, tmpbuf, num_vals, tmpdsstype); \ - for (i = 0 ; i < *num_vals ; ++i) { \ - ((unpack_type*) dest)[i] = (unpack_type)(tmpbuf[i]); \ - } \ - free(tmpbuf); \ - } while (0) - - int orte_dss_unpack(orte_buffer_t *buffer, void *dst, orte_std_cntr_t *num_vals, orte_data_type_t type) { @@ -603,49 +555,6 @@ int orte_dss_unpack_data_type(orte_buffer_t *buffer, void *dest, orte_std_cntr_t return ret; } -/* - * ORTE_DAEMON_CMD - */ -int orte_dss_unpack_daemon_cmd(orte_buffer_t *buffer, void *dest, orte_std_cntr_t *num_vals, - orte_data_type_t type) -{ - int ret; - orte_data_type_t remote_type; - - /* if the buffer is fully described, then we can do some magic to handle - * the heterogeneous case. if not, then we can only shoot blind - it is the - * user's responsibility to ensure we are in a homogeneous environment. - */ - if (ORTE_DSS_BUFFER_FULLY_DESC == buffer->type) { - /* see what type was actually packed */ - if (ORTE_SUCCESS != (ret = orte_dss_peek_type(buffer, &remote_type))) { - ORTE_ERROR_LOG(ret); - return ret; - } - - if (remote_type == ORTE_DAEMON_CMD_T) { - /* fast path it if the sizes are the same */ - /* Turn around and unpack the real type */ - if (ORTE_SUCCESS != (ret = orte_dss_unpack_buffer(buffer, dest, num_vals, ORTE_DAEMON_CMD_T))) { - ORTE_ERROR_LOG(ret); - } - } else { - /* slow path - types are different sizes */ - UNPACK_SIZE_MISMATCH(orte_daemon_cmd_flag_t, remote_type, ret); - } - return ret; - } - - /* if we get here, then this buffer is NOT fully described. just unpack it - * using the local size - user gets the pain if it's wrong - */ - if (ORTE_SUCCESS != (ret = orte_dss_unpack_buffer(buffer, dest, num_vals, ORTE_DAEMON_CMD_T))) { - ORTE_ERROR_LOG(ret); - } - - return ret; -} - /* * ORTE_DATA_VALUE */ diff --git a/orte/include/orte/orte_types.h b/orte/include/orte/orte_types.h index 05977d89d1..c28c0e07ad 100644 --- a/orte/include/orte/orte_types.h +++ b/orte/include/orte/orte_types.h @@ -46,10 +46,6 @@ typedef struct { uint8_t *bytes; } orte_byte_object_t; -/* define the orted command flag type */ -typedef uint16_t orte_daemon_cmd_flag_t; -#define ORTE_DAEMON_CMD_T ORTE_UINT16 - /** * handle differences in iovec */ diff --git a/orte/mca/errmgr/base/Makefile.am b/orte/mca/errmgr/base/Makefile.am index f01254bdcb..04be5aae25 100644 --- a/orte/mca/errmgr/base/Makefile.am +++ b/orte/mca/errmgr/base/Makefile.am @@ -17,10 +17,12 @@ # headers += \ + base/errmgr_private.h \ base/base.h libmca_errmgr_la_SOURCES += \ base/errmgr_base_close.c \ + base/errmgr_base_receive.c \ base/errmgr_base_select.c \ base/errmgr_base_open.c \ base/errmgr_base_fns.c diff --git a/orte/mca/errmgr/base/base.h b/orte/mca/errmgr/base/base.h index 422cdb741c..24e38627e2 100644 --- a/orte/mca/errmgr/base/base.h +++ b/orte/mca/errmgr/base/base.h @@ -48,26 +48,9 @@ extern "C" { * function definitions */ ORTE_DECLSPEC int orte_errmgr_base_open(void); -ORTE_DECLSPEC int orte_errmgr_base_select(bool *allow_multi_user_threads, - bool *have_hidden_threads); +ORTE_DECLSPEC int orte_errmgr_base_select(void); ORTE_DECLSPEC int orte_errmgr_base_close(void); - /* - * Base functions that are common to all implementations - can be overridden - */ - -ORTE_DECLSPEC void orte_errmgr_base_log(int error_code, char *filename, int line); - -ORTE_DECLSPEC void orte_errmgr_base_proc_aborted(orte_process_name_t *proc); - -ORTE_DECLSPEC void orte_errmgr_base_incomplete_start(orte_jobid_t job); - -ORTE_DECLSPEC void orte_errmgr_base_error_detected(int error_code); - -ORTE_DECLSPEC int orte_errmgr_base_register_job(orte_jobid_t job); - -ORTE_DECLSPEC void orte_errmgr_base_abort(void); - /* * globals that might be needed */ @@ -78,6 +61,8 @@ ORTE_DECLSPEC extern bool orte_errmgr_initialized; ORTE_DECLSPEC extern opal_list_t orte_errmgr_base_components_available; ORTE_DECLSPEC extern mca_errmgr_base_component_t orte_errmgr_base_selected_component; +/* make the default module available so that close can use it */ +ORTE_DECLSPEC extern orte_errmgr_base_module_t orte_errmgr_default; /* * external API functions will be documented in the mca/errmgr/errmgr.h file */ diff --git a/orte/mca/errmgr/base/errmgr_base_close.c b/orte/mca/errmgr/base/errmgr_base_close.c index c06f750761..bc52a3f390 100644 --- a/orte/mca/errmgr/base/errmgr_base_close.c +++ b/orte/mca/errmgr/base/errmgr_base_close.c @@ -24,6 +24,8 @@ #include "opal/util/trace.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" + +#include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/base/base.h" @@ -44,6 +46,10 @@ int orte_errmgr_base_close(void) &orte_errmgr_base_components_available, NULL); orte_errmgr_initialized = false; + + /* set the module back to the default so that error logging can continue */ + orte_errmgr = orte_errmgr_default; + /* All done */ return ORTE_SUCCESS; diff --git a/orte/mca/errmgr/base/errmgr_base_fns.c b/orte/mca/errmgr/base/errmgr_base_fns.c index f0e5702020..24695615d0 100644 --- a/orte/mca/errmgr/base/errmgr_base_fns.c +++ b/orte/mca/errmgr/base/errmgr_base_fns.c @@ -23,18 +23,14 @@ #endif #include #include "orte/orte_constants.h" -#include "orte/mca/schema/schema.h" -#include "orte/runtime/runtime.h" -#include "orte/runtime/orte_wait.h" #include "opal/util/output.h" #include "opal/util/trace.h" #include "orte/util/proc_info.h" -#include "orte/mca/ns/ns.h" +#include "orte/mca/ns/ns_types.h" -#include "orte/mca/rmgr/rmgr.h" - -#include "orte/mca/errmgr/base/base.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/errmgr/base/errmgr_private.h" void orte_errmgr_base_log(int error_code, char *filename, int line) @@ -49,55 +45,37 @@ void orte_errmgr_base_log(int error_code, char *filename, int line) ORTE_NAME_ARGS(orte_process_info.my_name), ORTE_ERROR_NAME(error_code), filename, line); } - /* orte_errmgr_base_error_detected(error_code); */ } -void orte_errmgr_base_proc_aborted(orte_process_name_t *proc) +int orte_errmgr_base_proc_aborted_not_avail(orte_gpr_notify_message_t *msg) { - orte_jobid_t job; - int rc; - - OPAL_TRACE(1); - - if (ORTE_SUCCESS != (rc = orte_ns.get_jobid(&job, proc))) { - ORTE_ERROR_LOG(rc); - return; - } - - orte_rmgr.terminate_job(job); + return ORTE_ERR_NOT_AVAILABLE; } -void orte_errmgr_base_incomplete_start(orte_jobid_t job) +int orte_errmgr_base_incomplete_start_not_avail(orte_gpr_notify_message_t *msgb) { - OPAL_TRACE(1); - - orte_rmgr.terminate_job(job); + return ORTE_ERR_NOT_AVAILABLE; } -void orte_errmgr_base_error_detected(int error_code) +void orte_errmgr_base_error_detected(int error_code, char *fmt, ...) { - OPAL_TRACE(1); - + /* we can't know if any output is available yet, so + * we just exit */ + exit(error_code); } -void orte_errmgr_base_abort() +void orte_errmgr_base_abort(void) { - OPAL_TRACE(1); - - /* kill and reap all children */ - orte_wait_kill(9); - - /* abnormal exit */ - orte_abort(-1, NULL); + /* guess we should exit */ + exit(-1); } -int orte_errmgr_base_register_job(orte_jobid_t job) +int orte_errmgr_base_register_job_not_avail(orte_jobid_t job) { - /* register subscription for process_status values - * changing to abnormal termination codes - */ - - OPAL_TRACE(1); - - return ORTE_SUCCESS; + return ORTE_ERR_NOT_AVAILABLE; +} + +int orte_errmgr_base_abort_procs_request_not_avail(orte_process_name_t *procs, orte_std_cntr_t num_procs) +{ + return ORTE_ERR_NOT_AVAILABLE; } diff --git a/orte/mca/errmgr/base/errmgr_base_open.c b/orte/mca/errmgr/base/errmgr_base_open.c index d924b957e3..1443029655 100644 --- a/orte/mca/errmgr/base/errmgr_base_open.c +++ b/orte/mca/errmgr/base/errmgr_base_open.c @@ -27,6 +27,7 @@ #include "opal/util/trace.h" #include "orte/mca/errmgr/base/base.h" +#include "orte/mca/errmgr/base/errmgr_private.h" /* @@ -45,14 +46,22 @@ * Global variables */ int orte_errmgr_base_output = -1; -orte_errmgr_base_module_t orte_errmgr = { +/* + * we must define a default module so that the error logging + * functions can be available as early as possible + */ +orte_errmgr_base_module_t orte_errmgr_default = { orte_errmgr_base_log, - orte_errmgr_base_proc_aborted, - orte_errmgr_base_incomplete_start, + orte_errmgr_base_proc_aborted_not_avail, + orte_errmgr_base_incomplete_start_not_avail, orte_errmgr_base_error_detected, - orte_errmgr_base_register_job, - orte_errmgr_base_abort + orte_errmgr_base_register_job_not_avail, + orte_errmgr_base_abort, + orte_errmgr_base_abort_procs_request_not_avail }; +/* start out with a default module */ +orte_errmgr_base_module_t orte_errmgr; + bool orte_errmgr_base_selected = false; opal_list_t orte_errmgr_base_components_available; mca_errmgr_base_component_t orte_errmgr_base_selected_component; @@ -81,6 +90,9 @@ int orte_errmgr_base_open(void) orte_errmgr_base_output = -1; } + /* set the default module */ + orte_errmgr = orte_errmgr_default; + /* Open up all available components */ if (ORTE_SUCCESS != diff --git a/orte/mca/errmgr/base/errmgr_base_receive.c b/orte/mca/errmgr/base/errmgr_base_receive.c new file mode 100644 index 0000000000..6f48a14a16 --- /dev/null +++ b/orte/mca/errmgr/base/errmgr_base_receive.c @@ -0,0 +1,162 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + */ + +/* + * includes + */ +#include "orte_config.h" + +#include "orte/orte_constants.h" +#include "orte/orte_types.h" + +#include "opal/util/output.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/dss/dss.h" +#include "orte/util/proc_info.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rml/rml.h" + +#include "orte/mca/errmgr/base/errmgr_private.h" + +static bool recv_issued=false; + +int orte_errmgr_base_comm_start(void) +{ + int rc; + + if (recv_issued) { + return ORTE_SUCCESS; + } + + if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_RML_NAME_ANY, + ORTE_RML_TAG_ERRMGR, + ORTE_RML_PERSISTENT, + orte_errmgr_base_recv, + NULL))) { + ORTE_ERROR_LOG(rc); + } + recv_issued = true; + + return rc; +} + +int orte_errmgr_base_comm_stop(void) +{ + int rc; + + if (!recv_issued) { + return ORTE_SUCCESS; + } + + if (ORTE_SUCCESS != (rc = orte_rml.recv_cancel(ORTE_RML_NAME_ANY, ORTE_RML_TAG_ERRMGR))) { + ORTE_ERROR_LOG(rc); + } + recv_issued = false; + + return rc; +} + + + +/* + * handle message from proxies + * NOTE: The incoming buffer "buffer" is OBJ_RELEASED by the calling program. + * DO NOT RELEASE THIS BUFFER IN THIS CODE + */ + +void orte_errmgr_base_recv(int status, orte_process_name_t* sender, + orte_buffer_t* buffer, orte_rml_tag_t tag, + void* cbdata) +{ + orte_buffer_t answer; + orte_errmgr_cmd_flag_t command; + orte_std_cntr_t count, nprocs; + orte_process_name_t *procs; + int rc; + + /* get the command */ + count = 1; + if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &command, &count, ORTE_ERRMGR_CMD))) { + ORTE_ERROR_LOG(rc); + return; + } + + /* setup to return an answer */ + OBJ_CONSTRUCT(&answer, orte_buffer_t); + + /* pack the command in the answer - this is done to allow the caller to check + * that we are talking about the same command + */ + if (ORTE_SUCCESS != (rc = orte_dss.pack(&answer, &command, 1, ORTE_ERRMGR_CMD))) { + ORTE_ERROR_LOG(rc); + return; + } + + switch (command) { + case ORTE_ERRMGR_ABORT_PROCS_REQUEST_CMD: + /* get the number of processes */ + count = 1; + if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &nprocs, &count, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + goto SEND_ANSWER; + } + /* get the required space */ + procs = (orte_process_name_t*)malloc(nprocs * sizeof(orte_process_name_t)); + if (NULL == procs) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + goto SEND_ANSWER; + } + + /* unpack the array of process names */ + count = nprocs; + if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &procs, &count, ORTE_NAME))) { + ORTE_ERROR_LOG(rc); + goto SEND_ANSWER; + } + /* if we didn't get the number we requested, then something is wrong */ + if (count != nprocs) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + goto SEND_ANSWER; + } + + /* process the request */ + if (ORTE_SUCCESS != (rc = orte_errmgr.abort_procs_request(procs, nprocs))) { + ORTE_ERROR_LOG(rc); + goto SEND_ANSWER; + } + break; + + default: + ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS); + } + +SEND_ANSWER: + if (0 > orte_rml.send_buffer(sender, &answer, tag, 0)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + } + + /* cleanup */ + OBJ_DESTRUCT(&answer); +} + diff --git a/orte/mca/errmgr/base/errmgr_base_select.c b/orte/mca/errmgr/base/errmgr_base_select.c index 041b4f8542..d808fbed78 100644 --- a/orte/mca/errmgr/base/errmgr_base_select.c +++ b/orte/mca/errmgr/base/errmgr_base_select.c @@ -29,8 +29,7 @@ * Function for selecting one component from all those that are * available. */ -int orte_errmgr_base_select(bool *allow_multi_user_threads, - bool *have_hidden_threads) +int orte_errmgr_base_select(void) { opal_list_item_t *item; mca_base_component_list_item_t *cli; @@ -71,11 +70,9 @@ int orte_errmgr_base_select(bool *allow_multi_user_threads, best_module = module; best_component = component; - *allow_multi_user_threads = multi; - *have_hidden_threads = hidden; - /* update the best priority */ - best_priority = priority; + /* update the best priority */ + best_priority = priority; } /* If it's not the best one, finalize it */ @@ -86,10 +83,10 @@ int orte_errmgr_base_select(bool *allow_multi_user_threads, } } - /* If we didn't find one to select, that's okay - stick with default */ + /* If we didn't find one to select, then we have a big problem */ if (NULL == best_component) { - return ORTE_SUCCESS; + return ORTE_ERROR; } /* We have happiness -- save the component and module for later @@ -98,7 +95,7 @@ int orte_errmgr_base_select(bool *allow_multi_user_threads, orte_errmgr = *best_module; orte_errmgr_base_selected_component = *best_component; orte_errmgr_base_selected = true; - + /* all done */ return ORTE_SUCCESS; diff --git a/orte/mca/errmgr/base/errmgr_private.h b/orte/mca/errmgr/base/errmgr_private.h new file mode 100644 index 0000000000..93f8cb495b --- /dev/null +++ b/orte/mca/errmgr/base/errmgr_private.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + */ + +#ifndef ORTE_MCA_ERRMGR_PRIVATE_H +#define ORTE_MCA_ERRMGR_PRIVATE_H + +/* + * includes + */ +#include "orte_config.h" +#include "orte/orte_constants.h" + +#include "orte/mca/ns/ns_types.h" +#include "orte/mca/gpr/gpr_types.h" +#include "orte/mca/rml/rml_types.h" + + +/* + * Functions for use solely within the ERRMGR framework + */ +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +/* Define the ERRMGR command flag */ +typedef uint8_t orte_errmgr_cmd_flag_t; +#define ORTE_ERRMGR_CMD ORTE_UINT8 + +/* define some commands */ +#define ORTE_ERRMGR_ABORT_PROCS_REQUEST_CMD 0x01 + +/* Internal support */ +int orte_errmgr_base_comm_start(void); +int orte_errmgr_base_comm_stop(void); +void orte_errmgr_base_recv(int status, orte_process_name_t* sender, + orte_buffer_t* buffer, orte_rml_tag_t tag, + void* cbdata); + + +/* + * Base functions + */ + +ORTE_DECLSPEC void orte_errmgr_base_log(int error_code, char *filename, int line); + +ORTE_DECLSPEC int orte_errmgr_base_proc_aborted_not_avail(orte_gpr_notify_message_t *msg); + +ORTE_DECLSPEC int orte_errmgr_base_incomplete_start_not_avail(orte_gpr_notify_message_t *msg); + +ORTE_DECLSPEC void orte_errmgr_base_error_detected(int error_code, char *fmt, ...); + +ORTE_DECLSPEC int orte_errmgr_base_register_job_not_avail(orte_jobid_t job); + +ORTE_DECLSPEC void orte_errmgr_base_abort(void); + +ORTE_DECLSPEC int orte_errmgr_base_abort_procs_request_not_avail(orte_process_name_t *procs, orte_std_cntr_t num_procs); + +/* + * external API functions will be documented in the mca/errmgr/errmgr.h file + */ + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif +#endif diff --git a/orte/mca/errmgr/bproc/Makefile.am b/orte/mca/errmgr/bproc/Makefile.am new file mode 100644 index 0000000000..76d885f3f6 --- /dev/null +++ b/orte/mca/errmgr/bproc/Makefile.am @@ -0,0 +1,51 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +AM_CPPFLAGS = -I$(top_ompi_builddir)/src/include $(errmgr_bproc_CPPFLAGS) + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if OMPI_BUILD_errmgr_bproc_DSO +component_noinst = +component_install = mca_errmgr_bproc.la +else +component_noinst = libmca_errmgr_bproc.la +component_install = +endif + +sources = \ + errmgr_bproc.h \ + errmgr_bproc.c \ + errmgr_bproc_component.c + +mcacomponentdir = $(libdir)/openmpi +mcacomponent_LTLIBRARIES = $(component_install) +mca_errmgr_bproc_la_SOURCES = $(sources) +mca_errmgr_bproc_la_LIBADD = \ + $(errmgr_bproc_LIBS) \ + $(top_ompi_builddir)/orte/liborte.la \ + $(top_ompi_builddir)/opal/libopal.la +mca_errmgr_bproc_la_LDFLAGS = -module -avoid-version $(errmgr_bproc_LDFLAGS) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_errmgr_bproc_la_SOURCES = $(sources) +libmca_errmgr_bproc_la_LIBADD = $(errmgr_bproc_LIBS) +libmca_errmgr_bproc_la_LDFLAGS = -module -avoid-version $(errmgr_bproc_LDFLAGS) diff --git a/orte/mca/pls/bproc_orted/configure.m4 b/orte/mca/errmgr/bproc/configure.m4 similarity index 62% rename from orte/mca/pls/bproc_orted/configure.m4 rename to orte/mca/errmgr/bproc/configure.m4 index b22682f661..5fc4f86287 100644 --- a/orte/mca/pls/bproc_orted/configure.m4 +++ b/orte/mca/errmgr/bproc/configure.m4 @@ -17,22 +17,22 @@ # $HEADER$ # -# MCA_pls_bproc_orted_CONFIG([action-if-found], [action-if-not-found]) +# MCA_errmgr_bproc_CONFIG([action-if-found], [action-if-not-found]) # ----------------------------------------------------------- -AC_DEFUN([MCA_pls_bproc_orted_CONFIG],[ - OMPI_CHECK_BPROC([pls_bproc_orted], [pls_bproc_orted_good=1], - [pls_bproc_orted_good=1], [pls_bproc_orted_good=0]) +AC_DEFUN([MCA_errmgr_bproc_CONFIG],[ + OMPI_CHECK_BPROC([errmgr_bproc], [errmgr_bproc_good=1], + [errmgr_bproc_good=1], [errmgr_bproc_good=0]) # if check worked, set wrapper flags if so. # Evaluate succeed / fail - AS_IF([test "$pls_bproc_orted_good" = "1"], - [pls_bproc_orted_WRAPPER_EXTRA_LDFLAGS="$pls_bproc_orted_LDFLAGS" - pls_bproc_orted_WRAPPER_EXTRA_LIBS="$pls_bproc_orted_LIBS" + AS_IF([test "$errmgr_bproc_good" = "1"], + [errmgr_bproc_WRAPPER_EXTRA_LDFLAGS="$errmgr_bproc_LDFLAGS" + errmgr_bproc_WRAPPER_EXTRA_LIBS="$errmgr_bproc_LIBS" $1], [$2]) # set build flags to use in makefile - AC_SUBST([pls_bproc_orted_CPPFLAGS]) - AC_SUBST([pls_bproc_orted_LDFLAGS]) - AC_SUBST([pls_bproc_orted_LIBS]) + AC_SUBST([errmgr_bproc_CPPFLAGS]) + AC_SUBST([errmgr_bproc_LDFLAGS]) + AC_SUBST([errmgr_bproc_LIBS]) ])dnl diff --git a/orte/mca/errmgr/bproc/configure.params b/orte/mca/errmgr/bproc/configure.params new file mode 100644 index 0000000000..dfebb2cc50 --- /dev/null +++ b/orte/mca/errmgr/bproc/configure.params @@ -0,0 +1,23 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Specific to this module + +PARAM_INIT_FILE=errmgr_bproc_component.c +PARAM_CONFIG_FILES="Makefile" diff --git a/orte/mca/errmgr/bproc/errmgr_bproc.c b/orte/mca/errmgr/bproc/errmgr_bproc.c new file mode 100644 index 0000000000..cf1f182dc6 --- /dev/null +++ b/orte/mca/errmgr/bproc/errmgr_bproc.c @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "orte_config.h" +#include "orte/orte_constants.h" + +#include "opal/util/output.h" + +#include "orte/runtime/runtime.h" +#include "orte/mca/ns/ns_types.h" +#include "orte/mca/gpr/gpr_types.h" +#include "orte/mca/rml/rml.h" + +#include "orte/mca/errmgr/base/errmgr_private.h" +#include "orte/mca/errmgr/bproc/errmgr_bproc.h" + +/* + * This function gets called when the SMR updates a process state to + * indicate that it aborted. Since the bproc component is only active on + * non-HNP processes, this function will NEVER be called + */ +int orte_errmgr_bproc_proc_aborted(orte_gpr_notify_message_t *msg) +{ + return ORTE_ERR_NOT_AVAILABLE; +} + +/* + * This function gets called when the SMR updates a process state to + * indicate that it failed to start. Since the bproc component is only active on + * non-HNP processes, this function will NEVER be called + */ +int orte_errmgr_bproc_incomplete_start(orte_gpr_notify_message_t *msg) +{ + return ORTE_ERR_NOT_AVAILABLE; +} + +/* + * This function gets called when a process detects an internal error. + * Bproc is unusually bad about letting us pass information that we + * aborted as opposed to normally terminated. There is no way to locally + * monitor the process state on a remote node, so the only thing we + * can do is pass the info back to the Bproc PLS on the HNP and let it + * figure out what to do. + */ +void orte_errmgr_bproc_error_detected(int error_code, char *fmt, ...) +{ + va_list arglist; + orte_buffer_t* cmd; + uint8_t command; + int rc; + + /* If there was a message, output it */ + va_start(arglist, fmt); + if( NULL != fmt ) { + char* buffer = NULL; + vasprintf( &buffer, fmt, arglist ); + opal_output( 0, buffer ); + free( buffer ); + } + va_end(arglist); + + /* Now prepare and send a message to the BProc PLS so it knows that + * we abnormally terminated. It doesn't matter what is in the + * message - the fact that it gets received is adequate + */ + command = 0x01; + + cmd = OBJ_NEW(orte_buffer_t); + if (cmd == NULL) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return; + } + + /* just pack something */ + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_UINT8))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return; + } + + /* send the alert */ + if (0 > orte_rml.send_buffer(orte_errmgr_bproc_globals.replica, cmd, ORTE_RML_TAG_BPROC_ABORT, 0)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(cmd); + return; + } + OBJ_RELEASE(cmd); + + /* okay, now we can truly abort. Tell the abort function not to bother writing out + * an abort file - we can't do anything with it anyway! + */ + orte_abort(error_code, false); +} + +/* + * This function gets called when a process desperately needs to just die. + * Nothing can be done by definition here - this function ONLY gets + * called as an absolute last resort. + */ +void orte_errmgr_bproc_abort() +{ + /* abnormal exit - no point in writing out an abort file as bproc doesn't + * know what to do with it anyway + */ + orte_abort(-1, false); +} + +/* + * Alternatively, some systems (e.g., OpenMPI) need to tell us to kill + * some other subset of processes along with us. Send that info to the + * HNP so it can kill them. + * + * NOTE: this function assumes that the underlying ORTE infrastructure is + * still operational. Use of this function should therefore be restricted + * to cases where the problem is in a higher layer (e.g., MPI) as the + * process is likely to "hang" if an ORTE problem has been encountered. + */ +int orte_errmgr_bproc_abort_procs_request(orte_process_name_t *procs, orte_std_cntr_t nprocs) +{ + orte_buffer_t* cmd; + orte_buffer_t* answer; + orte_errmgr_cmd_flag_t command; + orte_std_cntr_t count; + int rc; + + /* protect us against error */ + if (NULL == procs) { + return ORTE_ERR_BAD_PARAM; + } + + command = ORTE_ERRMGR_ABORT_PROCS_REQUEST_CMD; + + cmd = OBJ_NEW(orte_buffer_t); + if (cmd == NULL) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + /* pack the command */ + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_ERRMGR_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + /* pack the number of procs we are requesting be aborted */ + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &nprocs, 1, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + /* pack the array of proc names */ + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, procs, nprocs, ORTE_NAME))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + /* send the request */ + if (0 > orte_rml.send_buffer(orte_errmgr_bproc_globals.replica, cmd, ORTE_RML_TAG_RDS, 0)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(cmd); + return ORTE_ERR_COMM_FAILURE; + } + OBJ_RELEASE(cmd); + + /* setup a buffer for the answer */ + answer = OBJ_NEW(orte_buffer_t); + if(answer == NULL) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + /* enter a blocking receive until we hear back */ + if (0 > orte_rml.recv_buffer(orte_errmgr_bproc_globals.replica, answer, ORTE_RML_TAG_RDS)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(answer); + return ORTE_ERR_COMM_FAILURE; + } + + count = 1; + if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_ERRMGR_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(answer); + return rc; + } + + /* check that this is the right command */ + if (ORTE_ERRMGR_ABORT_PROCS_REQUEST_CMD != command) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(answer); + return ORTE_ERR_COMM_FAILURE; + } + + /* clean up and leave */ + OBJ_RELEASE(answer); + return ORTE_SUCCESS; +} + +/* + * It is imperative that ONLY an HNP perform this registration! + */ +int orte_errmgr_bproc_register_job(orte_jobid_t job) +{ + return ORTE_SUCCESS; +} diff --git a/orte/mca/errmgr/bproc/errmgr_bproc.h b/orte/mca/errmgr/bproc/errmgr_bproc.h new file mode 100644 index 0000000000..e47056cc27 --- /dev/null +++ b/orte/mca/errmgr/bproc/errmgr_bproc.h @@ -0,0 +1,81 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ +#ifndef ORTE_ERRMGR_BPROC_H +#define ORTE_ERRMGR_BPROC_H + + +#include "orte_config.h" +#include "orte/orte_types.h" + +#include "orte/mca/ns/ns_types.h" +#include "orte/mca/gpr/gpr_types.h" + +#include "orte/mca/errmgr/errmgr.h" + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +/* + * Module open / close + */ +int orte_errmgr_bproc_open(void); +int orte_errmgr_bproc_close(void); + + +/* + * Startup / Shutdown + */ +orte_errmgr_base_module_t* +orte_errmgr_bproc_component_init(bool *allow_multi_user_threads, bool *have_hidden_threads, int *priority); + +int orte_errmgr_bproc_finalize(void); + +/* + * globals used within the component + */ +typedef struct { + int debug; + orte_process_name_t *replica; +} orte_errmgr_bproc_globals_t; + + +extern orte_errmgr_bproc_globals_t orte_errmgr_bproc_globals; + +/* + * Component API functions + */ +int orte_errmgr_bproc_proc_aborted(orte_gpr_notify_message_t *msg); + +int orte_errmgr_bproc_incomplete_start(orte_gpr_notify_message_t *msg); + +void orte_errmgr_bproc_error_detected(int error_code, char *fmt, ...); + +void orte_errmgr_bproc_abort(void); + +int orte_errmgr_bproc_register_job(orte_jobid_t job); + +int orte_errmgr_bproc_abort_procs_request(orte_process_name_t *procs, orte_std_cntr_t nprocs); + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + +#endif diff --git a/orte/mca/errmgr/bproc/errmgr_bproc_component.c b/orte/mca/errmgr/bproc/errmgr_bproc_component.c new file mode 100644 index 0000000000..eaecf7ec65 --- /dev/null +++ b/orte/mca/errmgr/bproc/errmgr_bproc_component.c @@ -0,0 +1,164 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + * The Open MPI General Purpose Registry - Proxy component + * + */ + +/* + * includes + */ +#include "orte_config.h" + +#include "orte/orte_constants.h" +#include "orte/orte_types.h" + +#include "opal/util/output.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/util/proc_info.h" +#include "orte/mca/ns/ns_types.h" + +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/errmgr/base/errmgr_private.h" + +#include "errmgr_bproc.h" + + +/* + * Struct of function pointers that need to be initialized + */ +mca_errmgr_base_component_t mca_errmgr_bproc_component = { + { + ORTE_ERRMGR_BASE_VERSION_1_3_0, + + "bproc", /* MCA module name */ + ORTE_MAJOR_VERSION, /* MCA module major version */ + ORTE_MINOR_VERSION, /* MCA module minor version */ + ORTE_RELEASE_VERSION, /* MCA module release version */ + orte_errmgr_bproc_open, /* module open */ + orte_errmgr_bproc_close /* module close */ + }, + { + false /* checkpoint / restart */ + }, + orte_errmgr_bproc_component_init, /* module init */ + orte_errmgr_bproc_finalize /* module shutdown */ +}; + +/* + * setup the function pointers for the module + */ +static orte_errmgr_base_module_t orte_errmgr_bproc = { + orte_errmgr_base_log, + orte_errmgr_bproc_proc_aborted, + orte_errmgr_bproc_incomplete_start, + orte_errmgr_bproc_error_detected, + orte_errmgr_bproc_register_job, + orte_errmgr_bproc_abort, + orte_errmgr_bproc_abort_procs_request +}; + + +/* + * Whether or not we allowed this component to be selected + */ +static bool initialized = false; + +/* local globals */ +orte_errmgr_bproc_globals_t orte_errmgr_bproc_globals; + +/* + * Open the component + */ +int orte_errmgr_bproc_open(void) +{ + int id, tmp; + + id = mca_base_param_register_int("errmgr", "bproc", "debug", NULL, 0); + mca_base_param_lookup_int(id, &tmp); + if (tmp) { + orte_errmgr_bproc_globals.debug = true; + } else { + orte_errmgr_bproc_globals.debug = false; + } + + return ORTE_SUCCESS; +} + +/* + * Close the component + */ +int orte_errmgr_bproc_close(void) +{ + return ORTE_SUCCESS; +} + +orte_errmgr_base_module_t* +orte_errmgr_bproc_component_init(bool *allow_multi_user_threads, bool *have_hidden_threads, + int *priority) +{ + if (orte_errmgr_bproc_globals.debug) { + opal_output(0, "errmgr_bproc_init called"); + } + + /* If we are an HNP or an orted, then don't pick us! */ + if (orte_process_info.seed || orte_process_info.daemon) { + /* don't take me! */ + return NULL; + } + + /* Return a module (choose an arbitrary, positive priority -- + absolutely must be higher than the proxy component + */ + + *priority = 100; + + /* no part of OpenRTE allows or has threads */ + + *allow_multi_user_threads = false; + *have_hidden_threads = false; + + /* define the replica for us to use - for now, just point + * to the name service replica + */ + orte_errmgr_bproc_globals.replica = orte_process_info.ns_replica; + + initialized = true; + return &orte_errmgr_bproc; +} + +/* + * finalize routine + */ +int orte_errmgr_bproc_finalize(void) +{ + if (orte_errmgr_bproc_globals.debug) { + opal_output(0, "[%lu,%lu,%lu] errmgr_bproc_finalize called", + ORTE_NAME_ARGS(orte_process_info.my_name)); + } + + initialized = false; + + /* All done */ + return ORTE_SUCCESS; +} diff --git a/orte/mca/errmgr/errmgr.h b/orte/mca/errmgr/errmgr.h index 3207683537..4c9f6be642 100644 --- a/orte/mca/errmgr/errmgr.h +++ b/orte/mca/errmgr/errmgr.h @@ -30,7 +30,11 @@ #include "orte_config.h" #include "orte/orte_constants.h" + + #include "orte/mca/schema/schema.h" +#include "orte/mca/gpr/gpr_types.h" +#include "orte/mca/ns/ns_types.h" #include "opal/mca/mca.h" @@ -58,8 +62,7 @@ extern "C" { /** * Log an error - * Log an error that occurred in the runtime environment, and call the "error_detected" - * interface to see if further action is required. + * Log an error that occurred in the runtime environment * * @code * orte_errmgr.log("this is an error", __FILE__, __LINE__); @@ -70,70 +73,110 @@ typedef void (*orte_errmgr_base_module_log_fn_t)(int error_code, char *filename, /** * Alert - process aborted - * This function is called when a remote process aborts during execution. Note that local - * process errors should always be reported through the error_detected interface and - * NOT here. The function is called when a message is received from the universe daemon - * indicating that another process in the job failed. For now, this function will - * simply cause the local process to gracefully finalize and terminate. + * This function is called when a remote process aborts during execution. The function + * is called via the GPR's trigger notification system. Actions taken in response + * to the abnormal termination of a remote application process will vary across + * the various errmgr components. + + * NOTE: Local process errors should always be reported through the error_detected interface and + * NOT here. */ -typedef void (*orte_errmgr_base_module_proc_aborted_fn_t)(orte_process_name_t *proc); +typedef int (*orte_errmgr_base_module_proc_aborted_fn_t)(orte_gpr_notify_message_t *msg); /** * Alert - incomplete start of a job * This function is called when an attempted launch of a job encounters failure of - * one or more processes to start. The function decides on the strategy for dealing - * with this "incomplete start" situation - for now, it simply orders the resource - * manager to terminate the entire job. + * one or more processes to start. The strategy for dealing + * with this "incomplete start" situation varies across the various errmgr components. * * This function is only called by the respective process launcher, which is responsible - * for detecting incomplete starts. + * for detecting incomplete starts. If on a daemon, the function simply updates the + * process state to indicate failure to launch - this initiates a trigger that goes to + * the respective HNP for response. + * + * NOTE: Errmgr components on non-HNP and non-daemon processes are expressly forbidden + * from taking any action to this function call. Instead, they are restricted to simply + * returning. */ -typedef void (*orte_errmgr_base_module_incomplete_start_fn_t)(orte_jobid_t job); +typedef int (*orte_errmgr_base_module_incomplete_start_fn_t)(orte_gpr_notify_message_t *msg); /** * Alert - internal error detected - * This function is called when an internal error is detected within the local process. - * It decides what to do about the error - for now, it simply orders the local process - * to finalize and terminate. + * This function is called when an internal error is detected within a local process. + * It decides what to do about the error. In the case of application processes, it simply + * orders the local process to finalize and terminate. The abnormal termination will be + * detected and dealt with by the daemon/HNP system. + * + * HNPs, of course, cannot simply exit - they must first cleanup their running jobs if at + * all possible. In some cases, this cannot be done - e.g., if the error detected would + * prevent operation of the registry or has corrupted memory. In these extreme cases, + * nothing can really be done. + * + * Likewise, orteds have responsibility towards their local application processes and + * must make some attempt to clean them up before exiting. + * + * The function pretty prints an error message if possible. Error message should be + * specified using the standard \code printf() format. */ -typedef void (*orte_errmgr_base_module_error_detected_fn_t)(int error_code); +typedef void (*orte_errmgr_base_module_error_detected_fn_t)(int error_code, char *fmt, ...); /* * Register a job with the error manager * When a job is launched, this function is called so the error manager can register * subscriptions on the job segment so that the error manager will be notified when * problems occur - i.e., when process status entries change to abnormal termination - * values. Process status entries are changed by the appropriate state-of-health monitor + * values. Process status entries are changed by the appropriate state monitor * and/or the process launcher, depending upon the stage at which the problem occurs. * * Monitoring of the job begins once the job has reached the "executing" stage. Prior * to that time, failure of processes to start are the responsibility of the respective * process launcher - which is expected to call the error manager via the "incomplete * start" interface to report any problems prior to the job beginning "execution". + * + * NOTE: ONLY HNPs are allowed to register for trigger reports. All other components + * MUST do nothing but return ORTE_SUCCESS. */ typedef int (*orte_errmgr_base_module_register_job_fn_t)(orte_jobid_t job); /** * Alert - self aborting - * This function is called when a process is aborting. The routine will kill - * any child processes and terminate the calling process. + * This function is called when a process is aborting. It will finalize the process + * itself, and then exits - it takes no other actions. The intent here is to provide + * a last-ditch exit procedure that attempts to clean up a little. */ typedef void (*orte_errmgr_base_module_abort_fn_t)(void); +/* + * Request that the system abort processes other than myself + * The possibility exists that a process will decide that ONLY a small subset of a job + * must be aborted. This function allows a process to request that the identified + * processes be aborted. The "request" portion of the function's name is not + * by accident - this function specifically does NOT perform the abort process + * itself, but simply requests that it be done. + * + * NOTE: Please ensure that you do NOT include your own process name in the + * array or else you will be ordered to "die" before you complete this function + * (i.e., you will be held in a blocking receive pending an answer from the + * HNP, which won't come before you receive your own "die" command). If you need + * to die too, then call "abort" after completing this function call. + */ +typedef int (*orte_errmgr_base_module_abort_procs_request_fn_t)(orte_process_name_t *procs, orte_std_cntr_t num_procs); + /* * Ver 1.0.0 */ -struct orte_errmgr_base_module_1_0_0_t { - orte_errmgr_base_module_log_fn_t log; - orte_errmgr_base_module_proc_aborted_fn_t proc_aborted; - orte_errmgr_base_module_incomplete_start_fn_t incomplete_start; - orte_errmgr_base_module_error_detected_fn_t error_detected; - orte_errmgr_base_module_register_job_fn_t register_job; - orte_errmgr_base_module_abort_fn_t abort; +struct orte_errmgr_base_module_1_3_0_t { + orte_errmgr_base_module_log_fn_t log; + orte_errmgr_base_module_proc_aborted_fn_t proc_aborted; + orte_errmgr_base_module_incomplete_start_fn_t incomplete_start; + orte_errmgr_base_module_error_detected_fn_t error_detected; + orte_errmgr_base_module_register_job_fn_t register_job; + orte_errmgr_base_module_abort_fn_t abort; + orte_errmgr_base_module_abort_procs_request_fn_t abort_procs_request; }; -typedef struct orte_errmgr_base_module_1_0_0_t orte_errmgr_base_module_1_0_0_t; -typedef orte_errmgr_base_module_1_0_0_t orte_errmgr_base_module_t; +typedef struct orte_errmgr_base_module_1_3_0_t orte_errmgr_base_module_1_3_0_t; +typedef orte_errmgr_base_module_1_3_0_t orte_errmgr_base_module_t; /* * ERRMGR Component @@ -150,26 +193,26 @@ typedef int (*orte_errmgr_base_component_finalize_fn_t)(void); * the standard component data structure */ -struct mca_errmgr_base_component_1_0_0_t { +struct mca_errmgr_base_component_1_3_0_t { mca_base_component_t errmgr_version; mca_base_component_data_1_0_0_t errmgr_data; orte_errmgr_base_component_init_fn_t errmgr_init; orte_errmgr_base_component_finalize_fn_t errmgr_finalize; }; -typedef struct mca_errmgr_base_component_1_0_0_t mca_errmgr_base_component_1_0_0_t; -typedef mca_errmgr_base_component_1_0_0_t mca_errmgr_base_component_t; +typedef struct mca_errmgr_base_component_1_3_0_t mca_errmgr_base_component_1_3_0_t; +typedef mca_errmgr_base_component_1_3_0_t mca_errmgr_base_component_t; /* * Macro for use in components that are of type errmgr v1.0.0 */ -#define ORTE_ERRMGR_BASE_VERSION_1_0_0 \ - /* ns v1.0 is chained to MCA v1.0 */ \ +#define ORTE_ERRMGR_BASE_VERSION_1_3_0 \ + /* errmgr v1.3 is chained to MCA v1.0 */ \ MCA_BASE_VERSION_1_0_0, \ - /* errmgr v1.0 */ \ - "errmgr", 1, 0, 0 + /* errmgr v1.3 */ \ + "errmgr", 1, 3, 0 /* Global structure for accessing error manager functions */ diff --git a/orte/mca/errmgr/hnp/Makefile.am b/orte/mca/errmgr/hnp/Makefile.am new file mode 100644 index 0000000000..4f4b5c0c97 --- /dev/null +++ b/orte/mca/errmgr/hnp/Makefile.am @@ -0,0 +1,46 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + errmgr_hnp.h \ + errmgr_hnp_component.c \ + errmgr_hnp.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if OMPI_BUILD_errmgr_hnp_DSO +component_noinst = +component_install = mca_errmgr_hnp.la +else +component_noinst = libmca_errmgr_hnp.la +component_install = +endif + +mcacomponentdir = $(libdir)/openmpi +mcacomponent_LTLIBRARIES = $(component_install) +mca_errmgr_hnp_la_SOURCES = $(sources) +mca_errmgr_hnp_la_LDFLAGS = -module -avoid-version +mca_errmgr_hnp_la_LIBADD = \ + $(top_ompi_builddir)/orte/liborte.la \ + $(top_ompi_builddir)/opal/libopal.la + +noinst_LTLIBRARIES = $(component_noinst) +libmca_errmgr_hnp_la_SOURCES =$(sources) +libmca_errmgr_hnp_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/errmgr/hnp/configure.params b/orte/mca/errmgr/hnp/configure.params new file mode 100644 index 0000000000..bec31b8d4a --- /dev/null +++ b/orte/mca/errmgr/hnp/configure.params @@ -0,0 +1,23 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Specific to this module + +PARAM_INIT_FILE=errmgr_hnp_component.c +PARAM_CONFIG_FILES="Makefile" diff --git a/orte/mca/errmgr/hnp/errmgr_hnp.c b/orte/mca/errmgr/hnp/errmgr_hnp.c new file mode 100644 index 0000000000..9ba651f89c --- /dev/null +++ b/orte/mca/errmgr/hnp/errmgr_hnp.c @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "orte_config.h" +#include "orte/orte_constants.h" + +#include +#include + +#include "opal/util/trace.h" +#include "opal/util/output.h" + +#include "orte/runtime/runtime.h" +#include "orte/mca/ns/ns_types.h" +#include "orte/mca/gpr/gpr.h" +#include "orte/mca/pls/pls.h" +#include "orte/mca/smr/smr.h" + +#include "orte/mca/errmgr/base/base.h" +#include "orte/mca/errmgr/hnp/errmgr_hnp.h" + +/* + * This function gets called when the someone updates a process + * state to indicate it has aborted. That action results in + * the firing of a registry trigger that passes a minimal + * data message here. The only part of that message we need + * is the segment name so we can extract the jobid from it + * + * Various components will follow their own strategy for dealing with + * this situation. For this component, we simply kill the job. + */ +int orte_errmgr_hnp_proc_aborted(orte_gpr_notify_message_t *msg) +{ + orte_jobid_t job; + int rc; + + OPAL_TRACE(1); + + opal_output(orte_errmgr_base_output, "errmgr:hnp: proc abort has been detected"); + + /* This trigger is named, so we can extract the jobid + * directly from the trigger name + */ + if (ORTE_SUCCESS != (rc = orte_schema.extract_jobid_from_std_trigger_name(&job, msg->target))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* set the job state */ + if (ORTE_SUCCESS != (rc = orte_smr.set_job_state(job, ORTE_JOB_STATE_ABORTED))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* tell the pls to terminate the job */ + if (ORTE_SUCCESS != (rc = orte_pls.terminate_job(job))) { + ORTE_ERROR_LOG(rc); + } + + return rc; +} + +/* + * This function gets called when someone updates a process + * state to indicate it failed to start. That action results in + * the firing of a registry trigger that passes a minimal + * data message here. The only part of that message we need + * is the segment name so we can extract the jobid from it + * + * Various components will follow their own strategy for dealing with + * this situation. For this component, we simply kill the job. + */ +int orte_errmgr_hnp_incomplete_start(orte_gpr_notify_message_t *msg) +{ + orte_jobid_t job; + int rc; + + OPAL_TRACE(1); + + /* This trigger is named, so we can extract the jobid + * directly from the trigger name + */ + if (ORTE_SUCCESS != (rc = orte_schema.extract_jobid_from_std_trigger_name(&job, msg->target))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* set the job state */ + if (ORTE_SUCCESS != (rc = orte_smr.set_job_state(job, ORTE_JOB_STATE_FAILED_TO_START))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* tell the pls to terminate the job */ + if (ORTE_SUCCESS != (rc = orte_pls.terminate_job(job))) { + ORTE_ERROR_LOG(rc); + } + + return rc; +} + +/* + * This function gets called when the HNP itself detects an internal error! + * Ideally, we would find some way to tell all the active jobs to die before + * we depart ourselves. Unfortunately, at this time, we aren't sure we can do + * this - later, we'll add some more intelligence by, for example, checking + * the error code to see if it's something that would allow us to alert + * the remote orteds. + * + * For now, we'll just depart! + */ +void orte_errmgr_hnp_error_detected(int error_code, char *fmt, ...) +{ + va_list arglist; + + /* If there was a message, output it */ + + va_start(arglist, fmt); + if( NULL != fmt ) { + char* buffer = NULL; + vasprintf( &buffer, fmt, arglist ); + opal_output( 0, buffer ); + free( buffer ); + } + va_end(arglist); + + /* abnormal exit */ + orte_abort(error_code, false); +} + +/* + * This function gets called when the HNP desperately needs to just die. + * Nothing can be done by definition here - this function ONLY gets + * called as an absolute last resort + */ +void orte_errmgr_hnp_abort(void) +{ + OPAL_TRACE(1); + + /* abnormal exit */ + orte_abort(-1, false); +} + +/* + * This function gets called when a process wants to request that the HNP + * abort some set of processes for it. Since this component IS for the HNP, + * that means we need to actually execute this request! Call upon the PLS + * as needed to execute the abort requests + */ +int orte_errmgr_hnp_abort_procs_request(orte_process_name_t *procs, orte_std_cntr_t nprocs) +{ + int rc; + + rc = ORTE_SUCCESS; + return rc; +} + +/* + * Register the HNP's errmgr functions to be called when the job encounters + * certain pre-identified problem states. + * + * NOTE: It is imperative that ONLY the HNP perform this registration! + */ +int orte_errmgr_hnp_register_job(orte_jobid_t job) +{ + /* we need to setup two counters and their corresponding triggers - one + * to alert us when something fails to launch, and another for when + * someone aborts + */ + int rc; + + /* define the ABORT trigger to fire when any process aborts */ + if (ORTE_SUCCESS != (rc = orte_smr.define_alert_monitor(job, ORTE_NUM_ABORTED_TRIGGER, + ORTE_PROC_NUM_ABORTED, 0, 1, true, + orte_errmgr_hnp_proc_aborted, NULL))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* define the FAILED_LAUNCH trigger to fire when the launch fails */ + if (ORTE_SUCCESS != (rc = orte_smr.define_alert_monitor(job, ORTE_FAILED_TO_START_TRIGGER, + ORTE_PROC_NUM_FAILED_START, 0, 1, true, + orte_errmgr_hnp_incomplete_start, NULL))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + return ORTE_SUCCESS; +} diff --git a/orte/mca/errmgr/hnp/errmgr_hnp.h b/orte/mca/errmgr/hnp/errmgr_hnp.h new file mode 100644 index 0000000000..bfa7399a31 --- /dev/null +++ b/orte/mca/errmgr/hnp/errmgr_hnp.h @@ -0,0 +1,79 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ +#ifndef ORTE_ERRMGR_HNP_H +#define ORTE_ERRMGR_HNP_H + + +#include "orte_config.h" +#include "orte/orte_types.h" + +#include "orte/mca/ns/ns_types.h" + +#include "orte/mca/errmgr/errmgr.h" + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +/* + * Module open / close + */ +int orte_errmgr_hnp_open(void); +int orte_errmgr_hnp_close(void); + + +/* + * Startup / Shutdown + */ +orte_errmgr_base_module_t* +orte_errmgr_hnp_component_init(bool *allow_multi_user_threads, bool *have_hidden_threads, int *priority); + +int orte_errmgr_hnp_finalize(void); + +/* + * globals used within the component + */ +typedef struct { + int debug; +} orte_errmgr_hnp_globals_t; + + +extern orte_errmgr_hnp_globals_t orte_errmgr_hnp_globals; + +/* + * Component API functions + */ +int orte_errmgr_hnp_proc_aborted(orte_gpr_notify_message_t *msg); + +int orte_errmgr_hnp_incomplete_start(orte_gpr_notify_message_t *msg); + +void orte_errmgr_hnp_error_detected(int error_code, char *fmt, ...); + +void orte_errmgr_hnp_abort(void); + +int orte_errmgr_hnp_register_job(orte_jobid_t job); + +int orte_errmgr_hnp_abort_procs_request(orte_process_name_t *procs, orte_std_cntr_t nprocs); + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + +#endif diff --git a/orte/mca/errmgr/hnp/errmgr_hnp_component.c b/orte/mca/errmgr/hnp/errmgr_hnp_component.c new file mode 100644 index 0000000000..3b3b11cdd8 --- /dev/null +++ b/orte/mca/errmgr/hnp/errmgr_hnp_component.c @@ -0,0 +1,174 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + * The Open MPI General Purpose Registry - Proxy component + * + */ + +/* + * includes + */ +#include "orte_config.h" + +#include "orte/orte_constants.h" +#include "orte/orte_types.h" + +#include "opal/util/output.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/util/proc_info.h" +#include "orte/mca/ns/ns_types.h" + +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/errmgr/base/errmgr_private.h" + +#include "errmgr_hnp.h" + + +/* + * Struct of function pointers that need to be initialized + */ +mca_errmgr_base_component_t mca_errmgr_hnp_component = { + { + ORTE_ERRMGR_BASE_VERSION_1_3_0, + + "hnp", /* MCA module name */ + ORTE_MAJOR_VERSION, /* MCA module major version */ + ORTE_MINOR_VERSION, /* MCA module minor version */ + ORTE_RELEASE_VERSION, /* MCA module release version */ + orte_errmgr_hnp_open, /* module open */ + orte_errmgr_hnp_close /* module close */ + }, + { + false /* checkpoint / restart */ + }, + orte_errmgr_hnp_component_init, /* module init */ + orte_errmgr_hnp_finalize /* module shutdown */ +}; + +/* + * setup the function pointers for the module + */ +static orte_errmgr_base_module_t orte_errmgr_hnp = { + orte_errmgr_base_log, + orte_errmgr_hnp_proc_aborted, + orte_errmgr_hnp_incomplete_start, + orte_errmgr_hnp_error_detected, + orte_errmgr_hnp_register_job, + orte_errmgr_hnp_abort, + orte_errmgr_hnp_abort_procs_request +}; + + +/* + * Whether or not we allowed this component to be selected + */ +static bool initialized = false; + +/* local globals */ +orte_errmgr_hnp_globals_t orte_errmgr_hnp_globals; + + +/* + * Open the component + */ +int orte_errmgr_hnp_open(void) +{ + int id, tmp; + + id = mca_base_param_register_int("errmgr", "hnp", "debug", NULL, 0); + mca_base_param_lookup_int(id, &tmp); + if (tmp) { + orte_errmgr_hnp_globals.debug = true; + } else { + orte_errmgr_hnp_globals.debug = false; + } + + return ORTE_SUCCESS; +} + +/* + * Close the component + */ +int orte_errmgr_hnp_close(void) +{ + return ORTE_SUCCESS; +} + +orte_errmgr_base_module_t* +orte_errmgr_hnp_component_init(bool *allow_multi_user_threads, bool *have_hidden_threads, + int *priority) +{ + int rc; + + if (orte_errmgr_hnp_globals.debug) { + opal_output(0, "errmgr_hnp_init called"); + } + + /* If we are not an HNP, then don't pick us! */ + if (!orte_process_info.seed) { + /* don't take me! */ + return NULL; + } + + /* Return a module (choose an arbitrary, positive priority -- + it's only relevant compared to other components). */ + + *priority = 10; + + /* no part of OpenRTE allows or has threads */ + + *allow_multi_user_threads = false; + *have_hidden_threads = false; + + /* start the receive function */ + if (ORTE_SUCCESS != (rc = orte_errmgr_base_comm_start())) { + ORTE_ERROR_LOG(rc); + return NULL; + } + + initialized = true; + return &orte_errmgr_hnp; +} + +/* + * finalize routine + */ +int orte_errmgr_hnp_finalize(void) +{ + int rc; + + if (orte_errmgr_hnp_globals.debug) { + opal_output(0, "[%lu,%lu,%lu] errmgr_hnp_finalize called", + ORTE_NAME_ARGS(orte_process_info.my_name)); + } + + /* stop the receive function */ + if (ORTE_SUCCESS != (rc = orte_errmgr_base_comm_stop())) { + ORTE_ERROR_LOG(rc); + } + + initialized = false; + + /* All done */ + return ORTE_SUCCESS; +} diff --git a/orte/mca/errmgr/orted/Makefile.am b/orte/mca/errmgr/orted/Makefile.am new file mode 100644 index 0000000000..aedfd357b8 --- /dev/null +++ b/orte/mca/errmgr/orted/Makefile.am @@ -0,0 +1,46 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + errmgr_orted.h \ + errmgr_orted_component.c \ + errmgr_orted.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if OMPI_BUILD_errmgr_orted_DSO +component_noinst = +component_install = mca_errmgr_orted.la +else +component_noinst = libmca_errmgr_orted.la +component_install = +endif + +mcacomponentdir = $(libdir)/openmpi +mcacomponent_LTLIBRARIES = $(component_install) +mca_errmgr_orted_la_SOURCES = $(sources) +mca_errmgr_orted_la_LDFLAGS = -module -avoid-version +mca_errmgr_orted_la_LIBADD = \ + $(top_ompi_builddir)/orte/liborte.la \ + $(top_ompi_builddir)/opal/libopal.la + +noinst_LTLIBRARIES = $(component_noinst) +libmca_errmgr_orted_la_SOURCES =$(sources) +libmca_errmgr_orted_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/errmgr/orted/configure.params b/orte/mca/errmgr/orted/configure.params new file mode 100644 index 0000000000..3be23d4b25 --- /dev/null +++ b/orte/mca/errmgr/orted/configure.params @@ -0,0 +1,23 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Specific to this module + +PARAM_INIT_FILE=errmgr_orted_component.c +PARAM_CONFIG_FILES="Makefile" diff --git a/orte/mca/errmgr/orted/errmgr_orted.c b/orte/mca/errmgr/orted/errmgr_orted.c new file mode 100644 index 0000000000..929e63f79a --- /dev/null +++ b/orte/mca/errmgr/orted/errmgr_orted.c @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "orte_config.h" +#include "orte/orte_constants.h" + +#include +#include + +#include "opal/util/output.h" + +#include "orte/runtime/runtime.h" +#include "orte/runtime/orte_wait.h" +#include "orte/util/proc_info.h" +#include "orte/util/session_dir.h" +#include "orte/mca/ns/ns_types.h" +#include "orte/mca/smr/smr.h" +#include "orte/mca/rml/rml.h" + +#include "orte/mca/errmgr/base/errmgr_private.h" +#include "orte/mca/errmgr/orted/errmgr_orted.h" + +/* + * This function only gets called on HNP components! Orteds learn about + * a proc aborting from the HNP. + */ +int orte_errmgr_orted_proc_aborted(orte_gpr_notify_message_t *msg) +{ + return ORTE_ERR_NOT_AVAILABLE; +} + +/* This function only gets called on HNP components! Orteds learn about + * an incomplete start from the HNP. + */ +int orte_errmgr_orted_incomplete_start(orte_gpr_notify_message_t *msg) +{ + return ORTE_ERR_NOT_AVAILABLE; +} + +/* + * This function gets called when the orted itself detects an internal error! + * At some point in future, to be polite, we tell any of our own local + * processes to die before we abandon them + */ +void orte_errmgr_orted_error_detected(int error_code, char *fmt, ...) +{ + va_list arglist; + + /* If there was a message, output it */ + + va_start(arglist, fmt); + if( NULL != fmt ) { + char* buffer = NULL; + vasprintf( &buffer, fmt, arglist ); + opal_output( 0, buffer ); + free( buffer ); + } + va_end(arglist); + + /* cleanup my session directory */ + orte_session_dir_finalize(orte_process_info.my_name); + + /* abnormal exit */ + orte_abort(error_code, false); +} + +/* + * This function gets called when we desperately need to just die. + * Nothing can be done by definition here - this function ONLY gets + * called as an absolute last resort + */ +void orte_errmgr_orted_abort(void) +{ + /* cleanup my session directory */ + orte_session_dir_finalize(orte_process_info.my_name); + + /* abnormal exit */ + orte_abort(-1, false); +} + +/* + * This function is called by the orted to request that some set of processes + * be aborted by the HNP. This would likely be an unusual request as the orted + * would have no knowledge of other processes or real reason to order them killed. + * Still, the capability is provided here. + */ +int orte_errmgr_orted_abort_procs_request(orte_process_name_t *procs, orte_std_cntr_t nprocs) +{ + orte_buffer_t* cmd; + orte_buffer_t* answer; + orte_errmgr_cmd_flag_t command; + orte_std_cntr_t count; + int rc; + + /* protect us against error */ + if (NULL == procs) { + return ORTE_ERR_BAD_PARAM; + } + + command = ORTE_ERRMGR_ABORT_PROCS_REQUEST_CMD; + + cmd = OBJ_NEW(orte_buffer_t); + if (cmd == NULL) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + /* pack the command */ + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_ERRMGR_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + /* pack the number of procs we are requesting be aborted */ + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &nprocs, 1, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + /* pack the array of proc names */ + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, procs, nprocs, ORTE_NAME))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + /* send the request */ + if (0 > orte_rml.send_buffer(orte_errmgr_orted_globals.replica, cmd, ORTE_RML_TAG_RDS, 0)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(cmd); + return ORTE_ERR_COMM_FAILURE; + } + OBJ_RELEASE(cmd); + + /* setup a buffer for the answer */ + answer = OBJ_NEW(orte_buffer_t); + if(answer == NULL) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + /* enter a blocking receive until we hear back */ + if (0 > orte_rml.recv_buffer(orte_errmgr_orted_globals.replica, answer, ORTE_RML_TAG_RDS)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(answer); + return ORTE_ERR_COMM_FAILURE; + } + + count = 1; + if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_ERRMGR_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(answer); + return rc; + } + + /* check that this is the right command */ + if (ORTE_ERRMGR_ABORT_PROCS_REQUEST_CMD != command) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(answer); + return ORTE_ERR_COMM_FAILURE; + } + + /* clean up and leave */ + OBJ_RELEASE(answer); + return ORTE_SUCCESS; +} + +/* + * It is imperative that ONLY an HNP perform this registration! + */ +int orte_errmgr_orted_register_job(orte_jobid_t job) +{ + return ORTE_ERR_NOT_AVAILABLE; +} diff --git a/orte/mca/errmgr/orted/errmgr_orted.h b/orte/mca/errmgr/orted/errmgr_orted.h new file mode 100644 index 0000000000..709fd762bd --- /dev/null +++ b/orte/mca/errmgr/orted/errmgr_orted.h @@ -0,0 +1,81 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ +#ifndef ORTE_ERRMGR_ORTED_H +#define ORTE_ERRMGR_ORTED_H + + +#include "orte_config.h" +#include "orte/orte_types.h" + +#include "orte/mca/ns/ns_types.h" +#include "orte/mca/gpr/gpr_types.h" + +#include "orte/mca/errmgr/errmgr.h" + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +/* + * Module open / close + */ +int orte_errmgr_orted_open(void); +int orte_errmgr_orted_close(void); + + +/* + * Startup / Shutdown + */ +orte_errmgr_base_module_t* +orte_errmgr_orted_component_init(bool *allow_multi_user_threads, bool *have_hidden_threads, int *priority); + +int orte_errmgr_orted_finalize(void); + +/* + * globals used within the component + */ +typedef struct { + int debug; + orte_process_name_t *replica; +} orte_errmgr_orted_globals_t; + + +extern orte_errmgr_orted_globals_t orte_errmgr_orted_globals; + +/* + * Component API functions + */ +int orte_errmgr_orted_proc_aborted(orte_gpr_notify_message_t *msg); + +int orte_errmgr_orted_incomplete_start(orte_gpr_notify_message_t *msg); + +void orte_errmgr_orted_error_detected(int error_code, char *fmt, ...); + +void orte_errmgr_orted_abort(void); + +int orte_errmgr_orted_register_job(orte_jobid_t job); + +int orte_errmgr_orted_abort_procs_request(orte_process_name_t *procs, orte_std_cntr_t nprocs); + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + +#endif diff --git a/orte/mca/errmgr/orted/errmgr_orted_component.c b/orte/mca/errmgr/orted/errmgr_orted_component.c new file mode 100644 index 0000000000..1afc2ad27d --- /dev/null +++ b/orte/mca/errmgr/orted/errmgr_orted_component.c @@ -0,0 +1,164 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + * The Open MPI General Purpose Registry - Proxy component + * + */ + +/* + * includes + */ +#include "orte_config.h" + +#include "orte/orte_constants.h" +#include "orte/orte_types.h" + +#include "opal/util/output.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/util/proc_info.h" +#include "orte/mca/ns/ns_types.h" + +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/errmgr/base/errmgr_private.h" + +#include "errmgr_orted.h" + + +/* + * Struct of function pointers that need to be initialized + */ +mca_errmgr_base_component_t mca_errmgr_orted_component = { + { + ORTE_ERRMGR_BASE_VERSION_1_3_0, + + "orted", /* MCA module name */ + ORTE_MAJOR_VERSION, /* MCA module major version */ + ORTE_MINOR_VERSION, /* MCA module minor version */ + ORTE_RELEASE_VERSION, /* MCA module release version */ + orte_errmgr_orted_open, /* module open */ + orte_errmgr_orted_close /* module close */ + }, + { + false /* checkpoint / restart */ + }, + orte_errmgr_orted_component_init, /* module init */ + orte_errmgr_orted_finalize /* module shutdown */ +}; + +/* + * setup the function pointers for the module + */ +static orte_errmgr_base_module_t orte_errmgr_orted = { + orte_errmgr_base_log, + orte_errmgr_orted_proc_aborted, + orte_errmgr_orted_incomplete_start, + orte_errmgr_orted_error_detected, + orte_errmgr_orted_register_job, + orte_errmgr_orted_abort, + orte_errmgr_orted_abort_procs_request +}; + + +/* + * Whether or not we allowed this component to be selected + */ +static bool initialized = false; + +/* local globals */ +orte_errmgr_orted_globals_t orte_errmgr_orted_globals; + + +/* + * Open the component + */ +int orte_errmgr_orted_open(void) +{ + int id, tmp; + + id = mca_base_param_register_int("errmgr", "orted", "debug", NULL, 0); + mca_base_param_lookup_int(id, &tmp); + if (tmp) { + orte_errmgr_orted_globals.debug = true; + } else { + orte_errmgr_orted_globals.debug = false; + } + + return ORTE_SUCCESS; +} + +/* + * Close the component + */ +int orte_errmgr_orted_close(void) +{ + return ORTE_SUCCESS; +} + +orte_errmgr_base_module_t* +orte_errmgr_orted_component_init(bool *allow_multi_user_threads, bool *have_hidden_threads, + int *priority) +{ + if (orte_errmgr_orted_globals.debug) { + opal_output(0, "errmgr_orted_init called"); + } + + /* If we are not a daemon, then this component is not for us! */ + if (!orte_process_info.daemon) { + /* don't take me! */ + return NULL; + } + + /* Return a module (choose an arbitrary, positive priority -- + it's only relevant compared to other components). */ + + *priority = 10; + + /* no part of OpenRTE allows or has threads */ + + *allow_multi_user_threads = false; + *have_hidden_threads = false; + + /* define the HNP we should be talking to - for now, + * just use the NS replica + */ + orte_errmgr_orted_globals.replica = orte_process_info.ns_replica; + + initialized = true; + return &orte_errmgr_orted; +} + +/* + * finalize routine + */ +int orte_errmgr_orted_finalize(void) +{ + if (orte_errmgr_orted_globals.debug) { + opal_output(0, "[%lu,%lu,%lu] errmgr_orted_finalize called", + ORTE_NAME_ARGS(orte_process_info.my_name)); + } + + initialized = false; + + /* All done */ + return ORTE_SUCCESS; +} diff --git a/orte/mca/errmgr/proxy/Makefile.am b/orte/mca/errmgr/proxy/Makefile.am new file mode 100644 index 0000000000..a426e8b426 --- /dev/null +++ b/orte/mca/errmgr/proxy/Makefile.am @@ -0,0 +1,46 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + errmgr_proxy.h \ + errmgr_proxy_component.c \ + errmgr_proxy.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if OMPI_BUILD_errmgr_proxy_DSO +component_noinst = +component_install = mca_errmgr_proxy.la +else +component_noinst = libmca_errmgr_proxy.la +component_install = +endif + +mcacomponentdir = $(libdir)/openmpi +mcacomponent_LTLIBRARIES = $(component_install) +mca_errmgr_proxy_la_SOURCES = $(sources) +mca_errmgr_proxy_la_LDFLAGS = -module -avoid-version +mca_errmgr_proxy_la_LIBADD = \ + $(top_ompi_builddir)/orte/liborte.la \ + $(top_ompi_builddir)/opal/libopal.la + +noinst_LTLIBRARIES = $(component_noinst) +libmca_errmgr_proxy_la_SOURCES =$(sources) +libmca_errmgr_proxy_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/errmgr/proxy/configure.params b/orte/mca/errmgr/proxy/configure.params new file mode 100644 index 0000000000..9a1dd9a6a9 --- /dev/null +++ b/orte/mca/errmgr/proxy/configure.params @@ -0,0 +1,23 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Specific to this module + +PARAM_INIT_FILE=errmgr_proxy_component.c +PARAM_CONFIG_FILES="Makefile" diff --git a/orte/mca/errmgr/proxy/errmgr_proxy.c b/orte/mca/errmgr/proxy/errmgr_proxy.c new file mode 100644 index 0000000000..7d6c0e6e06 --- /dev/null +++ b/orte/mca/errmgr/proxy/errmgr_proxy.c @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "orte_config.h" +#include "orte/orte_constants.h" + +#include "opal/util/output.h" + +#include "orte/runtime/runtime.h" +#include "orte/mca/ns/ns_types.h" +#include "orte/mca/gpr/gpr_types.h" +#include "orte/mca/rml/rml.h" + +#include "orte/mca/errmgr/base/errmgr_private.h" +#include "orte/mca/errmgr/proxy/errmgr_proxy.h" + +/* + * This function gets called when the SMR updates a process state to + * indicate that it aborted. Since the proxy component is only active on + * non-HNP processes, this function will NEVER be called + */ +int orte_errmgr_proxy_proc_aborted(orte_gpr_notify_message_t *msg) +{ + return ORTE_ERR_NOT_AVAILABLE; +} + +/* + * This function gets called when the SMR updates a process state to + * indicate that it failed to start. Since the proxy component is only active on + * non-HNP processes, this function will NEVER be called + */ +int orte_errmgr_proxy_incomplete_start(orte_gpr_notify_message_t *msg) +{ + return ORTE_ERR_NOT_AVAILABLE; +} + +/* + * This function gets called when a process detects an internal error. + * Various non-HNP/non-orted errmgr components will deal with this in various + * ways - for now, we simply abort and provide the error_code as our + * exit status + */ +void orte_errmgr_proxy_error_detected(int error_code, char *fmt, ...) +{ + va_list arglist; + + /* If there was a message, output it */ + + va_start(arglist, fmt); + if( NULL != fmt ) { + char* buffer = NULL; + vasprintf( &buffer, fmt, arglist ); + opal_output( 0, buffer ); + free( buffer ); + } + va_end(arglist); + + orte_abort(error_code, true); +} + +/* + * This function gets called when a process desperately needs to just die. + * Nothing can be done by definition here - this function ONLY gets + * called as an absolute last resort. + */ +void orte_errmgr_proxy_abort() +{ + /* abnormal exit */ + orte_abort(-1, true); +} + +/* + * Alternatively, some systems (e.g., OpenMPI) need to tell us to kill + * some other subset of processes along with us. Send that info to the + * HNP so it can kill them. + * + * NOTE: this function assumes that the underlying ORTE infrastructure is + * still operational. Use of this function should therefore be restricted + * to cases where the problem is in a higher layer (e.g., MPI) as the + * process is likely to "hang" if an ORTE problem has been encountered. + */ +int orte_errmgr_proxy_abort_procs_request(orte_process_name_t *procs, orte_std_cntr_t nprocs) +{ + orte_buffer_t* cmd; + orte_buffer_t* answer; + orte_errmgr_cmd_flag_t command; + orte_std_cntr_t count; + int rc; + + /* protect us against error */ + if (NULL == procs) { + return ORTE_ERR_BAD_PARAM; + } + + command = ORTE_ERRMGR_ABORT_PROCS_REQUEST_CMD; + + cmd = OBJ_NEW(orte_buffer_t); + if (cmd == NULL) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + /* pack the command */ + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_ERRMGR_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + /* pack the number of procs we are requesting be aborted */ + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &nprocs, 1, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + /* pack the array of proc names */ + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, procs, nprocs, ORTE_NAME))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + /* send the request */ + if (0 > orte_rml.send_buffer(orte_errmgr_proxy_globals.replica, cmd, ORTE_RML_TAG_RDS, 0)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(cmd); + return ORTE_ERR_COMM_FAILURE; + } + OBJ_RELEASE(cmd); + + /* setup a buffer for the answer */ + answer = OBJ_NEW(orte_buffer_t); + if(answer == NULL) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + /* enter a blocking receive until we hear back */ + if (0 > orte_rml.recv_buffer(orte_errmgr_proxy_globals.replica, answer, ORTE_RML_TAG_RDS)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(answer); + return ORTE_ERR_COMM_FAILURE; + } + + count = 1; + if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_ERRMGR_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(answer); + return rc; + } + + /* check that this is the right command */ + if (ORTE_ERRMGR_ABORT_PROCS_REQUEST_CMD != command) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(answer); + return ORTE_ERR_COMM_FAILURE; + } + + /* clean up and leave */ + OBJ_RELEASE(answer); + return ORTE_SUCCESS; +} + +/* + * It is imperative that ONLY an HNP perform this registration! + */ +int orte_errmgr_proxy_register_job(orte_jobid_t job) +{ + return ORTE_SUCCESS; +} diff --git a/orte/mca/errmgr/proxy/errmgr_proxy.h b/orte/mca/errmgr/proxy/errmgr_proxy.h new file mode 100644 index 0000000000..bd3afac7c9 --- /dev/null +++ b/orte/mca/errmgr/proxy/errmgr_proxy.h @@ -0,0 +1,81 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ +#ifndef ORTE_ERRMGR_PROXY_H +#define ORTE_ERRMGR_PROXY_H + + +#include "orte_config.h" +#include "orte/orte_types.h" + +#include "orte/mca/ns/ns_types.h" +#include "orte/mca/gpr/gpr_types.h" + +#include "orte/mca/errmgr/errmgr.h" + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +/* + * Module open / close + */ +int orte_errmgr_proxy_open(void); +int orte_errmgr_proxy_close(void); + + +/* + * Startup / Shutdown + */ +orte_errmgr_base_module_t* +orte_errmgr_proxy_component_init(bool *allow_multi_user_threads, bool *have_hidden_threads, int *priority); + +int orte_errmgr_proxy_finalize(void); + +/* + * globals used within the component + */ +typedef struct { + int debug; + orte_process_name_t *replica; +} orte_errmgr_proxy_globals_t; + + +extern orte_errmgr_proxy_globals_t orte_errmgr_proxy_globals; + +/* + * Component API functions + */ +int orte_errmgr_proxy_proc_aborted(orte_gpr_notify_message_t *msg); + +int orte_errmgr_proxy_incomplete_start(orte_gpr_notify_message_t *msg); + +void orte_errmgr_proxy_error_detected(int error_code, char *fmt, ...); + +void orte_errmgr_proxy_abort(void); + +int orte_errmgr_proxy_register_job(orte_jobid_t job); + +int orte_errmgr_proxy_abort_procs_request(orte_process_name_t *procs, orte_std_cntr_t nprocs); + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + +#endif diff --git a/orte/mca/errmgr/proxy/errmgr_proxy_component.c b/orte/mca/errmgr/proxy/errmgr_proxy_component.c new file mode 100644 index 0000000000..ed1f339201 --- /dev/null +++ b/orte/mca/errmgr/proxy/errmgr_proxy_component.c @@ -0,0 +1,163 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + * The Open MPI General Purpose Registry - Proxy component + * + */ + +/* + * includes + */ +#include "orte_config.h" + +#include "orte/orte_constants.h" +#include "orte/orte_types.h" + +#include "opal/util/output.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/util/proc_info.h" +#include "orte/mca/ns/ns_types.h" + +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/errmgr/base/errmgr_private.h" + +#include "errmgr_proxy.h" + + +/* + * Struct of function pointers that need to be initialized + */ +mca_errmgr_base_component_t mca_errmgr_proxy_component = { + { + ORTE_ERRMGR_BASE_VERSION_1_3_0, + + "proxy", /* MCA module name */ + ORTE_MAJOR_VERSION, /* MCA module major version */ + ORTE_MINOR_VERSION, /* MCA module minor version */ + ORTE_RELEASE_VERSION, /* MCA module release version */ + orte_errmgr_proxy_open, /* module open */ + orte_errmgr_proxy_close /* module close */ + }, + { + false /* checkpoint / restart */ + }, + orte_errmgr_proxy_component_init, /* module init */ + orte_errmgr_proxy_finalize /* module shutdown */ +}; + +/* + * setup the function pointers for the module + */ +static orte_errmgr_base_module_t orte_errmgr_proxy = { + orte_errmgr_base_log, + orte_errmgr_proxy_proc_aborted, + orte_errmgr_proxy_incomplete_start, + orte_errmgr_proxy_error_detected, + orte_errmgr_proxy_register_job, + orte_errmgr_proxy_abort, + orte_errmgr_proxy_abort_procs_request +}; + + +/* + * Whether or not we allowed this component to be selected + */ +static bool initialized = false; + +/* local globals */ +orte_errmgr_proxy_globals_t orte_errmgr_proxy_globals; + +/* + * Open the component + */ +int orte_errmgr_proxy_open(void) +{ + int id, tmp; + + id = mca_base_param_register_int("errmgr", "proxy", "debug", NULL, 0); + mca_base_param_lookup_int(id, &tmp); + if (tmp) { + orte_errmgr_proxy_globals.debug = true; + } else { + orte_errmgr_proxy_globals.debug = false; + } + + return ORTE_SUCCESS; +} + +/* + * Close the component + */ +int orte_errmgr_proxy_close(void) +{ + return ORTE_SUCCESS; +} + +orte_errmgr_base_module_t* +orte_errmgr_proxy_component_init(bool *allow_multi_user_threads, bool *have_hidden_threads, + int *priority) +{ + if (orte_errmgr_proxy_globals.debug) { + opal_output(0, "errmgr_proxy_init called"); + } + + /* If we are an HNP or an orted, then don't pick us! */ + if (orte_process_info.seed || orte_process_info.daemon) { + /* don't take me! */ + return NULL; + } + + /* Return a module (choose an arbitrary, positive priority -- + it's only relevant compared to other components). */ + + *priority = 10; + + /* no part of OpenRTE allows or has threads */ + + *allow_multi_user_threads = false; + *have_hidden_threads = false; + + /* define the replica for us to use - for now, just point + * to the name service replica + */ + orte_errmgr_proxy_globals.replica = orte_process_info.ns_replica; + + initialized = true; + return &orte_errmgr_proxy; +} + +/* + * finalize routine + */ +int orte_errmgr_proxy_finalize(void) +{ + if (orte_errmgr_proxy_globals.debug) { + opal_output(0, "[%lu,%lu,%lu] errmgr_proxy_finalize called", + ORTE_NAME_ARGS(orte_process_info.my_name)); + } + + initialized = false; + + /* All done */ + return ORTE_SUCCESS; +} diff --git a/orte/mca/gpr/replica/functional_layer/gpr_replica_fn.h b/orte/mca/gpr/replica/functional_layer/gpr_replica_fn.h index 16e78a1290..7581ef62f1 100644 --- a/orte/mca/gpr/replica/functional_layer/gpr_replica_fn.h +++ b/orte/mca/gpr/replica/functional_layer/gpr_replica_fn.h @@ -278,6 +278,7 @@ int orte_gpr_replica_purge_subscriptions(orte_process_name_t *proc); int orte_gpr_replica_store_value_in_msg(orte_gpr_replica_requestor_t *req, orte_gpr_notify_message_t *msg, + char *sub_name, orte_std_cntr_t cnt, orte_gpr_value_t **values); diff --git a/orte/mca/gpr/replica/functional_layer/gpr_replica_messaging_fn.c b/orte/mca/gpr/replica/functional_layer/gpr_replica_messaging_fn.c index 4fa552e90f..a8d139da4c 100644 --- a/orte/mca/gpr/replica/functional_layer/gpr_replica_messaging_fn.c +++ b/orte/mca/gpr/replica/functional_layer/gpr_replica_messaging_fn.c @@ -213,7 +213,7 @@ int orte_gpr_replica_register_callback(orte_gpr_replica_subscription_t *sub, * subscription id, combining data where the id's match */ if (ORTE_SUCCESS != (rc = orte_gpr_replica_store_value_in_msg(reqs[i], - cb->message, cnt, values))) { + cb->message, sub->name, cnt, values))) { ORTE_ERROR_LOG(rc); goto CLEANUP; } @@ -436,6 +436,7 @@ int orte_gpr_replica_define_callback(orte_gpr_notify_msg_type_t msg_type, int orte_gpr_replica_store_value_in_msg(orte_gpr_replica_requestor_t *req, orte_gpr_notify_message_t *msg, + char *sub_name, orte_std_cntr_t cnt, orte_gpr_value_t **values) { @@ -482,6 +483,10 @@ int orte_gpr_replica_store_value_in_msg(orte_gpr_replica_requestor_t *req, ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } + /* set the name of the subscription, if provided */ + if (NULL != sub_name) { + dptr->target = strdup(sub_name); + } dptr->id = req->idtag; if (0 > orte_pointer_array_add(&index, msg->data, dptr)) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); @@ -526,7 +531,7 @@ static int orte_gpr_replica_store_value_in_trigger_msg(orte_gpr_replica_subscrip if (NULL != data[i]) { k++; if ((NULL == data[i]->target && NULL == sub) || - (NULL != data[i]->target && + (NULL != data[i]->target && NULL != sub->name && 0 == strcmp(data[i]->target, sub->name))) { /* going to the same place */ for (j=0; j < cnt; j++) { if (0 > orte_pointer_array_add(&index, data[i]->values, values[j])) { @@ -557,7 +562,7 @@ static int orte_gpr_replica_store_value_in_trigger_msg(orte_gpr_replica_subscrip ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } - if (NULL != sub) { + if (NULL != sub && NULL != sub->name) { dptr->target = strdup(sub->name); } if (0 > orte_pointer_array_add(&index, msg->data, dptr)) { diff --git a/orte/mca/ns/base/data_type_support/ns_data_type_compare_fns.c b/orte/mca/ns/base/data_type_support/ns_data_type_compare_fns.c index abbaf27d30..65cceb317d 100755 --- a/orte/mca/ns/base/data_type_support/ns_data_type_compare_fns.c +++ b/orte/mca/ns/base/data_type_support/ns_data_type_compare_fns.c @@ -42,28 +42,79 @@ int orte_ns_base_compare_name(orte_process_name_t *value1, return ORTE_VALUE1_GREATER; } - /* for this generic compare, go through the progression */ - if (value1->cellid < value2->cellid) { - return ORTE_VALUE2_GREATER; - } else if (value1->cellid > value2->cellid) { - return ORTE_VALUE1_GREATER; + /** we have to take care of the special case where one of the + * values is ORTE_NAME_WILDCARD. If any of the fields are wildcard, + * then we want to just ignore that one field. However, in the case + * of ORTE_NAME_WILDCARD (where ALL of the fields are wildcard), this + * would automatically result in ORTE_EQUAL for any name in the other + * value - a totally useless result. + * + * Instead, what we want to know in this case is if the value actually + * *is* ORTE_NAME_WILDCARD. So, we need to detect if one of the values + * is ORTE_NAME_WILDCARD, and then specifically check the other one + * to see if it matches + */ + if (value2->cellid == ORTE_CELLID_WILDCARD && + value2->jobid == ORTE_JOBID_WILDCARD && + value2->vpid == ORTE_VPID_WILDCARD) { + if (value1->cellid == ORTE_CELLID_WILDCARD && + value1->jobid == ORTE_JOBID_WILDCARD && + value1->vpid == ORTE_VPID_WILDCARD) { + return ORTE_EQUAL; + } else { + return ORTE_VALUE1_GREATER; + } + } else if (value1->cellid == ORTE_CELLID_WILDCARD && + value1->jobid == ORTE_JOBID_WILDCARD && + value1->vpid == ORTE_VPID_WILDCARD) { + if (value2->cellid == ORTE_CELLID_WILDCARD && + value2->jobid == ORTE_JOBID_WILDCARD && + value2->vpid == ORTE_VPID_WILDCARD) { + return ORTE_EQUAL; + } else { + return ORTE_VALUE2_GREATER; + } } - - /* get here if jobid's are equal - now check process group */ - if (value1->jobid < value2->jobid) { - return ORTE_VALUE2_GREATER; - } else if (value1->jobid > value2->jobid) { - return ORTE_VALUE1_GREATER; + + /** now that the special cases are done, go through the progression */ + + /** check the cellids - if one of them is WILDCARD, then ignore + * this field since anything is okay + */ + if (value1->cellid != ORTE_CELLID_WILDCARD && + value2->cellid != ORTE_CELLID_WILDCARD) { + if (value1->cellid < value2->cellid) { + return ORTE_VALUE2_GREATER; + } else if (value1->cellid > value2->cellid) { + return ORTE_VALUE1_GREATER; + } } - - /* get here if cellid's and jobid's are equal - now check vpid */ - if (value1->vpid < value2->vpid) { - return ORTE_VALUE2_GREATER; - } else if (value1->vpid > value2->vpid) { - return ORTE_VALUE1_GREATER; + + /** check the jobids - if one of them is WILDCARD, then ignore + * this field since anything is okay + */ + if (value1->jobid != ORTE_JOBID_WILDCARD && + value2->jobid != ORTE_JOBID_WILDCARD) { + if (value1->jobid < value2->jobid) { + return ORTE_VALUE2_GREATER; + } else if (value1->jobid > value2->jobid) { + return ORTE_VALUE1_GREATER; + } } - - /* only way to get here is if all fields are equal */ + + /** check the vpids - if one of them is WILDCARD, then ignore + * this field since anything is okay + */ + if (value1->vpid != ORTE_VPID_WILDCARD && + value2->vpid != ORTE_VPID_WILDCARD) { + if (value1->vpid < value2->vpid) { + return ORTE_VALUE2_GREATER; + } else if (value1->vpid > value2->vpid) { + return ORTE_VALUE1_GREATER; + } + } + + /** only way to get here is if all fields are equal or WILDCARD */ return ORTE_EQUAL; } @@ -72,10 +123,14 @@ int orte_ns_base_compare_vpid(orte_vpid_t *value1, orte_data_type_t type) { + /** if either value is WILDCARD, then return equal */ + if (*value1 == ORTE_VPID_WILDCARD || + *value2 == ORTE_VPID_WILDCARD) return ORTE_EQUAL; + if (*value1 > *value2) return ORTE_VALUE1_GREATER; - + if (*value2 > *value1) return ORTE_VALUE2_GREATER; - + return ORTE_EQUAL; } @@ -83,10 +138,14 @@ int orte_ns_base_compare_jobid(orte_jobid_t *value1, orte_jobid_t *value2, orte_data_type_t type) { + /** if either value is WILDCARD, then return equal */ + if (*value1 == ORTE_JOBID_WILDCARD || + *value2 == ORTE_JOBID_WILDCARD) return ORTE_EQUAL; + if (*value1 > *value2) return ORTE_VALUE1_GREATER; - + if (*value2 > *value1) return ORTE_VALUE2_GREATER; - + return ORTE_EQUAL; } @@ -94,9 +153,13 @@ int orte_ns_base_compare_cellid(orte_cellid_t *value1, orte_cellid_t *value2, orte_data_type_t type) { + /** if either value is WILDCARD, then return equal */ + if (*value1 == ORTE_CELLID_WILDCARD || + *value2 == ORTE_CELLID_WILDCARD) return ORTE_EQUAL; + if (*value1 > *value2) return ORTE_VALUE1_GREATER; - + if (*value2 > *value1) return ORTE_VALUE2_GREATER; - + return ORTE_EQUAL; } diff --git a/orte/mca/ns/base/ns_base_open.c b/orte/mca/ns/base/ns_base_open.c index 2fc4b32add..e6c49b0975 100644 --- a/orte/mca/ns/base/ns_base_open.c +++ b/orte/mca/ns/base/ns_base_open.c @@ -100,13 +100,13 @@ mca_ns_base_component_t mca_ns_base_selected_component; /* constructor - used to initialize namelist instance */ -static void orte_name_services_namelist_construct(orte_name_services_namelist_t* list) +static void orte_namelist_construct(orte_namelist_t* list) { list->name = NULL; } /* destructor - used to free any resources held by instance */ -static void orte_name_services_namelist_destructor(orte_name_services_namelist_t* list) +static void orte_namelist_destructor(orte_namelist_t* list) { if (NULL != list->name) { free(list->name); @@ -115,10 +115,10 @@ static void orte_name_services_namelist_destructor(orte_name_services_namelist_t /* define instance of opal_class_t */ OBJ_CLASS_INSTANCE( - orte_name_services_namelist_t, /* type name */ + orte_namelist_t, /* type name */ opal_list_item_t, /* parent "class" name */ - orte_name_services_namelist_construct, /* constructor */ - orte_name_services_namelist_destructor); /* destructor */ + orte_namelist_construct, /* constructor */ + orte_namelist_destructor); /* destructor */ diff --git a/orte/mca/ns/ns_types.h b/orte/mca/ns/ns_types.h index 22b6fd6ac8..7cca8d9435 100644 --- a/orte/mca/ns/ns_types.h +++ b/orte/mca/ns/ns_types.h @@ -91,6 +91,20 @@ typedef struct orte_process_name_t orte_process_name_t; #define ORTE_JOBID_MAX ORTE_STD_CNTR_MAX #define ORTE_VPID_MAX ORTE_STD_CNTR_MAX +/* + * define invalid values + */ +#define ORTE_CELLID_INVALID -999 +#define ORTE_JOBID_INVALID -999 +#define ORTE_VPID_INVALID -999 + +/* + * define wildcard values + */ +#define ORTE_CELLID_WILDCARD -1 +#define ORTE_JOBID_WILDCARD -1 +#define ORTE_VPID_WILDCARD -1 + ORTE_DECLSPEC extern orte_process_name_t orte_name_all; #define ORTE_NAME_ALL &orte_name_all @@ -117,13 +131,13 @@ ORTE_DECLSPEC extern orte_process_name_t orte_name_all; /** List of names for general use */ -struct orte_name_services_namelist_t { +struct orte_namelist_t { opal_list_item_t item; /**< Allows this item to be placed on a list */ orte_process_name_t *name; /**< Name of a process */ }; -typedef struct orte_name_services_namelist_t orte_name_services_namelist_t; +typedef struct orte_namelist_t orte_namelist_t; -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_name_services_namelist_t); +ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_namelist_t); #if defined(c_plusplus) || defined(__cplusplus) } diff --git a/orte/mca/ns/proxy/src/ns_proxy.c b/orte/mca/ns/proxy/src/ns_proxy.c index 04b945fa33..d365b76934 100644 --- a/orte/mca/ns/proxy/src/ns_proxy.c +++ b/orte/mca/ns/proxy/src/ns_proxy.c @@ -79,7 +79,7 @@ int orte_ns_proxy_create_cellid(orte_cellid_t *cellid, char *site, char *resourc return rc; } - if (0 > orte_rml.send_buffer(orte_ns_proxy.my_replica, cmd, MCA_OOB_TAG_NS, 0)) { + if (0 > orte_rml.send_buffer(orte_ns_proxy.my_replica, cmd, ORTE_RML_TAG_NS, 0)) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); OBJ_RELEASE(cmd); return ORTE_ERR_COMM_FAILURE; @@ -202,7 +202,7 @@ int orte_ns_proxy_get_cell_info(orte_cellid_t cellid, return rc; } - if (0 > orte_rml.send_buffer(orte_ns_proxy.my_replica, cmd, MCA_OOB_TAG_NS, 0)) { + if (0 > orte_rml.send_buffer(orte_ns_proxy.my_replica, cmd, ORTE_RML_TAG_NS, 0)) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); OBJ_RELEASE(cmd); OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); @@ -803,7 +803,7 @@ int orte_ns_proxy_create_my_name(void) return rc; } - if (0 > orte_rml.send_buffer(orte_ns_proxy.my_replica, cmd, MCA_OOB_TAG_NS, 0)) { + if (0 > orte_rml.send_buffer(orte_ns_proxy.my_replica, cmd, ORTE_RML_TAG_NS, 0)) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); OBJ_RELEASE(cmd); return ORTE_ERR_COMM_FAILURE; @@ -839,7 +839,7 @@ int orte_ns_proxy_dump_cells(void) return rc; } - if (0 > orte_rml.send_buffer(orte_ns_proxy.my_replica, &cmd, MCA_OOB_TAG_NS, 0)) { + if (0 > orte_rml.send_buffer(orte_ns_proxy.my_replica, &cmd, ORTE_RML_TAG_NS, 0)) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); OBJ_DESTRUCT(&cmd); OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); @@ -898,7 +898,7 @@ int orte_ns_proxy_dump_jobs(void) return rc; } - if (0 > orte_rml.send_buffer(orte_ns_proxy.my_replica, &cmd, MCA_OOB_TAG_NS, 0)) { + if (0 > orte_rml.send_buffer(orte_ns_proxy.my_replica, &cmd, ORTE_RML_TAG_NS, 0)) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); OBJ_DESTRUCT(&cmd); OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); @@ -947,7 +947,7 @@ int orte_ns_proxy_dump_tags(void) return rc; } - if (0 > orte_rml.send_buffer(orte_ns_proxy.my_replica, &cmd, MCA_OOB_TAG_NS, 0)) { + if (0 > orte_rml.send_buffer(orte_ns_proxy.my_replica, &cmd, ORTE_RML_TAG_NS, 0)) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); OBJ_DESTRUCT(&cmd); OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); @@ -1008,7 +1008,7 @@ int orte_ns_proxy_dump_datatypes(void) return rc; } - if (0 > orte_rml.send_buffer(orte_ns_proxy.my_replica, &cmd, MCA_OOB_TAG_NS, 0)) { + if (0 > orte_rml.send_buffer(orte_ns_proxy.my_replica, &cmd, ORTE_RML_TAG_NS, 0)) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); OBJ_DESTRUCT(&cmd); OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); diff --git a/orte/mca/odls/Makefile.am b/orte/mca/odls/Makefile.am new file mode 100644 index 0000000000..ac590af515 --- /dev/null +++ b/orte/mca/odls/Makefile.am @@ -0,0 +1,42 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# main library setup +noinst_LTLIBRARIES = libmca_odls.la +libmca_odls_la_SOURCES = + +# header setup +nobase_orte_HEADERS = +dist_pkgdata_DATA = + +# local files +headers = odls.h odls_types.h +libmca_odls_la_SOURCES += $(headers) + +# Conditionally install the header files +if WANT_INSTALL_HEADERS +nobase_orte_HEADERS += $(headers) +ortedir = $(includedir)/openmpi/orte/mca/odls +else +ortedir = $(includedir) +endif + +include base/Makefile.am + +distclean-local: + rm -f base/static-components.h diff --git a/orte/mca/odls/base/Makefile.am b/orte/mca/odls/base/Makefile.am new file mode 100644 index 0000000000..9b98af9dc5 --- /dev/null +++ b/orte/mca/odls/base/Makefile.am @@ -0,0 +1,33 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +headers += \ + base/odls_private.h \ + base/base.h + +libmca_odls_la_SOURCES += \ + base/odls_base_close.c \ + base/odls_base_open.c \ + base/odls_base_select.c \ + base/data_type_support/odls_compare_fns.c \ + base/data_type_support/odls_copy_fns.c \ + base/data_type_support/odls_packing_fns.c \ + base/data_type_support/odls_print_fns.c \ + base/data_type_support/odls_release_fns.c \ + base/data_type_support/odls_size_fns.c \ + base/data_type_support/odls_unpacking_fns.c diff --git a/orte/mca/odls/base/base.h b/orte/mca/odls/base/base.h new file mode 100644 index 0000000000..3b1f640780 --- /dev/null +++ b/orte/mca/odls/base/base.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + */ + +#ifndef MCA_ODLS_BASE_H +#define MCA_ODLS_BASE_H + +/* + * includes + */ +#include "orte_config.h" + +#include "opal/mca/mca.h" +#include "opal/class/opal_list.h" + +#include "orte/mca/odls/odls.h" + + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + + /** + * Struct to hold globals for the odls framework + */ + typedef struct orte_odls_base_t { + /* components are available */ + bool components_available; + /* component has been selected */ + bool selected; + /** List of opened components */ + opal_list_t available_components; + /** selected component */ + orte_odls_base_component_t selected_component; + } orte_odls_base_t; + + /** + * Global instance of odls-wide framework data + */ + ORTE_DECLSPEC extern orte_odls_base_t orte_odls_base; + + /* + * Global functions for MCA overall collective open and close + */ + + /** + * Open the odls framework + */ + ORTE_DECLSPEC int orte_odls_base_open(void); + /** + * Select an odls module + */ + ORTE_DECLSPEC int orte_odls_base_select(void); + + /** + * Close the odls framework + */ + ORTE_DECLSPEC int orte_odls_base_finalize(void); + ORTE_DECLSPEC int orte_odls_base_close(void); + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif +#endif diff --git a/orte/mca/odls/base/data_type_support/odls_compare_fns.c b/orte/mca/odls/base/data_type_support/odls_compare_fns.c new file mode 100755 index 0000000000..1edce27360 --- /dev/null +++ b/orte/mca/odls/base/data_type_support/odls_compare_fns.c @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/orte_constants.h" + +#include "orte/mca/odls/base/odls_private.h" + +/* ORTE_DAEMON_CMD */ +int orte_odls_compare_daemon_cmd(orte_daemon_cmd_flag_t *value1, orte_daemon_cmd_flag_t *value2, orte_data_type_t type) +{ + if (*value1 > *value2) return ORTE_VALUE1_GREATER; + + if (*value2 > *value1) return ORTE_VALUE2_GREATER; + + return ORTE_EQUAL; +} + diff --git a/orte/mca/odls/base/data_type_support/odls_copy_fns.c b/orte/mca/odls/base/data_type_support/odls_copy_fns.c new file mode 100755 index 0000000000..030bebfba9 --- /dev/null +++ b/orte/mca/odls/base/data_type_support/odls_copy_fns.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/orte_constants.h" + +#include "orte/mca/errmgr/errmgr.h" + +#include "orte/mca/odls/base/odls_private.h" + +int orte_odls_copy_daemon_cmd(orte_daemon_cmd_flag_t **dest, orte_daemon_cmd_flag_t *src, orte_data_type_t type) +{ + size_t datasize; + + datasize = sizeof(orte_daemon_cmd_flag_t); + + *dest = (orte_daemon_cmd_flag_t*)malloc(datasize); + if (NULL == *dest) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + memcpy(*dest, src, datasize); + + return ORTE_SUCCESS; +} + diff --git a/orte/mca/odls/base/data_type_support/odls_packing_fns.c b/orte/mca/odls/base/data_type_support/odls_packing_fns.c new file mode 100644 index 0000000000..449a6959a0 --- /dev/null +++ b/orte/mca/odls/base/data_type_support/odls_packing_fns.c @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/orte_constants.h" + +#include "orte/mca/errmgr/errmgr.h" +#include "orte/dss/dss_internal.h" + +#include "orte/mca/odls/base/odls_private.h" + +/* + * ORTE_DAEMON_CMD + */ +int orte_odls_pack_daemon_cmd(orte_buffer_t *buffer, void *src, orte_std_cntr_t num_vals, + orte_data_type_t type) +{ + int ret; + + /* Turn around and pack the real type */ + if (ORTE_SUCCESS != (ret = orte_dss_pack_buffer(buffer, src, num_vals, ORTE_DAEMON_CMD_T))) { + ORTE_ERROR_LOG(ret); + } + + return ret; +} + diff --git a/orte/mca/odls/base/data_type_support/odls_print_fns.c b/orte/mca/odls/base/data_type_support/odls_print_fns.c new file mode 100755 index 0000000000..ccc4d779d1 --- /dev/null +++ b/orte/mca/odls/base/data_type_support/odls_print_fns.c @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/orte_constants.h" + +#include "orte/mca/errmgr/errmgr.h" + +#include "orte/mca/odls/base/odls_private.h" + +/* + * ORTE_DAEMON_CMD + */ +int orte_odls_print_daemon_cmd(char **output, char *prefix, orte_daemon_cmd_flag_t *src, orte_data_type_t type) +{ + char *prefx; + + /* deal with NULL prefix */ + if (NULL == prefix) asprintf(&prefx, " "); + else prefx = prefix; + + /* if src is NULL, just print data type and return */ + if (NULL == src) { + asprintf(output, "%sData type: ORTE_DAEMON_CMD\tValue: NULL pointer", prefx); + return ORTE_SUCCESS; + } + + asprintf(output, "%sData type: ORTE_DAEMON_CMD\tValue: %lu", prefx, (unsigned long) *src); + + return ORTE_SUCCESS; +} + diff --git a/orte/mca/odls/base/data_type_support/odls_release_fns.c b/orte/mca/odls/base/data_type_support/odls_release_fns.c new file mode 100644 index 0000000000..e7e8f7775b --- /dev/null +++ b/orte/mca/odls/base/data_type_support/odls_release_fns.c @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" + +#include "orte/dss/dss_types.h" + +#include "orte/mca/odls/base/odls_private.h" + +/* + * STANDARD RELEASE FUNCTION - WORKS FOR EVERYTHING NON-STRUCTURED + */ +void orte_odls_std_release(orte_data_value_t *value) +{ + free(value->data); + value->data = NULL; +} diff --git a/orte/mca/odls/base/data_type_support/odls_size_fns.c b/orte/mca/odls/base/data_type_support/odls_size_fns.c new file mode 100755 index 0000000000..bbcc7e4763 --- /dev/null +++ b/orte/mca/odls/base/data_type_support/odls_size_fns.c @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University. + * All rights reserved. + * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. + * All rights reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/orte_constants.h" + +#include "orte/mca/odls/base/odls_private.h" + +/* + * STANDARD SIZE FUNCTION - WORKS FOR EVERYTHING NON-STRUCTURED + */ +int orte_odls_size_daemon_cmd(size_t *size, orte_daemon_cmd_flag_t *src, orte_data_type_t type) +{ + *size = sizeof(orte_daemon_cmd_flag_t); + + return ORTE_SUCCESS; +} diff --git a/orte/mca/odls/base/data_type_support/odls_unpacking_fns.c b/orte/mca/odls/base/data_type_support/odls_unpacking_fns.c new file mode 100644 index 0000000000..76242905ca --- /dev/null +++ b/orte/mca/odls/base/data_type_support/odls_unpacking_fns.c @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" +#include "orte/orte_constants.h" + +#include "orte/mca/errmgr/errmgr.h" +#include "orte/dss/dss_types.h" +#include "orte/dss/dss_internal.h" + +#include "orte/mca/odls/base/odls_private.h" + +/* + * ORTE_DAEMON_CMD + */ +int orte_odls_unpack_daemon_cmd(orte_buffer_t *buffer, void *dest, orte_std_cntr_t *num_vals, + orte_data_type_t type) +{ + int ret; + orte_data_type_t remote_type; + + /* if the buffer is fully described, then we can do some magic to handle + * the heterogeneous case. if not, then we can only shoot blind - it is the + * user's responsibility to ensure we are in a homogeneous environment. + */ + if (ORTE_DSS_BUFFER_FULLY_DESC == buffer->type) { + /* see what type was actually packed */ + if (ORTE_SUCCESS != (ret = orte_dss_peek_type(buffer, &remote_type))) { + ORTE_ERROR_LOG(ret); + return ret; + } + + if (remote_type == ORTE_DAEMON_CMD_T) { + /* fast path it if the sizes are the same */ + /* Turn around and unpack the real type */ + if (ORTE_SUCCESS != (ret = orte_dss_unpack_buffer(buffer, dest, num_vals, ORTE_DAEMON_CMD_T))) { + ORTE_ERROR_LOG(ret); + } + } else { + /* slow path - types are different sizes */ + UNPACK_SIZE_MISMATCH(orte_daemon_cmd_flag_t, remote_type, ret); + } + return ret; + } + + /* if we get here, then this buffer is NOT fully described. just unpack it + * using the local size - user gets the pain if it's wrong + */ + if (ORTE_SUCCESS != (ret = orte_dss_unpack_buffer(buffer, dest, num_vals, ORTE_DAEMON_CMD_T))) { + ORTE_ERROR_LOG(ret); + } + + return ret; +} + diff --git a/orte/mca/odls/base/odls_base_close.c b/orte/mca/odls/base/odls_base_close.c new file mode 100644 index 0000000000..d9290a6dd2 --- /dev/null +++ b/orte/mca/odls/base/odls_base_close.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" + +#include + +#include "orte/orte_constants.h" +#include "opal/util/trace.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" + +#include "orte/mca/odls/odls.h" +#include "orte/mca/odls/base/base.h" +#include "orte/mca/odls/base/odls_private.h" + + +int orte_odls_base_close(void) +{ + OPAL_TRACE(5); + + /* if no components are available, then punt */ + if (!orte_odls_base.components_available) { + return ORTE_SUCCESS; + } + + /* If we have a selected component and module, then finalize it */ + + if (orte_odls_base.selected) { + orte_odls_base.selected_component.finalize(); + } + + /* Close all available components (only one in this case) */ + + mca_base_components_close(orte_odls_globals.output, + &orte_odls_base.available_components, NULL); + + /* All done */ + + return ORTE_SUCCESS; +} diff --git a/orte/mca/odls/base/odls_base_open.c b/orte/mca/odls/base/odls_base_open.c new file mode 100644 index 0000000000..0189fc2212 --- /dev/null +++ b/orte/mca/odls/base/odls_base_open.c @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "orte_config.h" +#include "orte/orte_constants.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_param.h" +#include "opal/util/output.h" +#include "opal/util/trace.h" + +#include "orte/dss/dss.h" +#include "orte/util/proc_info.h" +#include "orte/mca/errmgr/errmgr.h" + +#include "orte/mca/odls/base/base.h" +#include "orte/mca/odls/base/odls_private.h" + + +/* + * The following file was created by configure. It contains extern + * statements and the definition of an array of pointers to each + * component's public mca_base_component_t struct. + */ + +#include "orte/mca/odls/base/static-components.h" + +/* + * Instantiate globals + */ +orte_odls_base_module_t orte_odls; + +/* + * Framework global variables + */ +orte_odls_base_t orte_odls_base; +orte_odls_globals_t orte_odls_globals; + +/** + * Function for finding and opening either all MCA components, or the one + * that was specifically requested via a MCA parameter. + */ +int orte_odls_base_open(void) +{ + int param, value, rc; + orte_data_type_t tmp; + + OPAL_TRACE(5); + + /* Debugging / verbose output */ + + param = mca_base_param_reg_int_name("odls_base", "verbose", + "Verbosity level for the odls framework", + false, false, 0, &value); + if (value != 0) { + orte_odls_globals.output = opal_output_open(NULL); + } else { + orte_odls_globals.output = -1; + } + + /* register the daemon cmd data type */ + tmp = ORTE_DAEMON_CMD; + if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_odls_pack_daemon_cmd, + orte_odls_unpack_daemon_cmd, + (orte_dss_copy_fn_t)orte_odls_copy_daemon_cmd, + (orte_dss_compare_fn_t)orte_odls_compare_daemon_cmd, + (orte_dss_size_fn_t)orte_odls_size_daemon_cmd, + (orte_dss_print_fn_t)orte_odls_print_daemon_cmd, + (orte_dss_release_fn_t)orte_odls_std_release, + ORTE_DSS_UNSTRUCTURED, + "ORTE_DAEMON_CMD", &tmp))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* if we are NOT a daemon, then that is ALL we do! We just needed to ensure + * that the data type(s) got registered so we can send messages to the daemons + */ + if (!orte_process_info.daemon) { + orte_odls_base.components_available = false; + return ORTE_SUCCESS; + } + + /* Open up all available components */ + + if (ORTE_SUCCESS != + mca_base_components_open("odls", orte_odls_globals.output, + mca_odls_base_static_components, + &orte_odls_base.available_components, true)) { + return ORTE_ERROR; + } + orte_odls_base.components_available = true; + + /* All done */ + + return ORTE_SUCCESS; +} diff --git a/orte/mca/odls/base/odls_base_select.c b/orte/mca/odls/base/odls_base_select.c new file mode 100644 index 0000000000..97d8ecbf4d --- /dev/null +++ b/orte/mca/odls/base/odls_base_select.c @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "orte_config.h" +#include "orte/orte_constants.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" + +#include "orte/mca/errmgr/errmgr.h" + +#include "orte/mca/odls/base/base.h" + + +/** + * Function for selecting one component from all those that are + * available. + */ +int orte_odls_base_select(void) +{ + opal_list_item_t *item; + mca_base_component_list_item_t *cli; + orte_odls_base_component_t *component, *best_component = NULL; + orte_odls_base_module_t *module, *best_module = NULL; + int priority, best_priority = -1; + + /* if no components are available (e.g., we are not in a daemon), then + * there is nothing to do - so just return + */ + if (!orte_odls_base.components_available) { + orte_odls_base.selected = false; + return ORTE_SUCCESS; + } + + /* Iterate through all the available components */ + + for (item = opal_list_get_first(&orte_odls_base.available_components); + item != opal_list_get_end(&orte_odls_base.available_components); + item = opal_list_get_next(item)) { + cli = (mca_base_component_list_item_t *) item; + component = (orte_odls_base_component_t *) cli->cli_component; + + /* Call the component's init function and see if it wants to be + selected */ + + module = component->init(&priority); + + /* If we got a non-NULL module back, then the component wants to + be selected. So save its multi/hidden values and save the + module with the highest priority */ + + if (NULL != module) { + /* If this is the best one, save it */ + + if (priority > best_priority) { + + /* If there was a previous best one, finalize */ + + if (NULL != best_component) { + best_component->finalize(); + } + + /* Save the new best one */ + + best_module = module; + best_component = component; + + /* update the best priority */ + best_priority = priority; + } + + /* If it's not the best one, finalize it */ + + else { + component->finalize(); + } + } + } + + /* If we didn't find one to select, then we have a big problem */ + + if (NULL == best_component) { + orte_odls_base.selected = false; + return ORTE_ERROR; + } + + /* We have happiness -- save the component and module for later + usage */ + + orte_odls = *best_module; + orte_odls_base.selected_component = *best_component; + orte_odls_base.selected = true; + + /* all done */ + + return ORTE_SUCCESS; +} diff --git a/orte/mca/odls/base/odls_private.h b/orte/mca/odls/base/odls_private.h new file mode 100644 index 0000000000..bf68239cb4 --- /dev/null +++ b/orte/mca/odls/base/odls_private.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + */ + +#ifndef MCA_ODLS_PRIVATE_H +#define MCA_ODLS_PRIVATE_H + +/* + * includes + */ +#include "orte_config.h" + +#include "opal/class/opal_list.h" + +#include "orte/dss/dss_types.h" +#include "orte/mca/ns/ns_types.h" +#include "orte/mca/rmgr/rmgr_types.h" + +#include "orte/mca/odls/odls_types.h" + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +/* + * General ODLS types + */ +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +typedef struct orte_odls_globals_t { + /** Verbose/debug output stream */ + int output; + /** Time to allow process to forcibly die */ + int timeout_before_sigkill; +} orte_odls_globals_t; + +extern orte_odls_globals_t orte_odls_globals; + +/* + * data type functions + */ + +int orte_odls_compare_daemon_cmd(orte_daemon_cmd_flag_t *value1, orte_daemon_cmd_flag_t *value2, orte_data_type_t type); + +int orte_odls_copy_daemon_cmd(orte_daemon_cmd_flag_t **dest, orte_daemon_cmd_flag_t *src, orte_data_type_t type); + +int orte_odls_pack_daemon_cmd(orte_buffer_t *buffer, void *src, + orte_std_cntr_t num_vals, orte_data_type_t type); + +int orte_odls_print_daemon_cmd(char **output, char *prefix, orte_daemon_cmd_flag_t *src, orte_data_type_t type); + +void orte_odls_std_release(orte_data_value_t *value); + +int orte_odls_size_daemon_cmd(size_t *size, orte_daemon_cmd_flag_t *src, orte_data_type_t type); + +int orte_odls_unpack_daemon_cmd(orte_buffer_t *buffer, void *dest, + orte_std_cntr_t *num_vals, orte_data_type_t type); + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif +#endif diff --git a/orte/mca/pls/bproc_orted/Makefile.am b/orte/mca/odls/bproc/Makefile.am similarity index 63% rename from orte/mca/pls/bproc_orted/Makefile.am rename to orte/mca/odls/bproc/Makefile.am index a927c5df40..c2e2ad1168 100644 --- a/orte/mca/pls/bproc_orted/Makefile.am +++ b/orte/mca/odls/bproc/Makefile.am @@ -9,6 +9,7 @@ # University of Stuttgart. All rights reserved. # Copyright (c) 2004-2005 The Regents of the University of California. # All rights reserved. +# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -16,37 +17,35 @@ # $HEADER$ # - - -AM_CPPFLAGS = -I$(top_ompi_builddir)/src/include $(pls_bproc_orted_CPPFLAGS) +AM_CPPFLAGS = -I$(top_ompi_builddir)/src/include $(odls_bproc_CPPFLAGS) # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la # (for static builds). -if OMPI_BUILD_pls_bproc_orted_DSO +if OMPI_BUILD_odls_bproc_DSO component_noinst = -component_install = mca_pls_bproc_orted.la +component_install = mca_odls_bproc.la else -component_noinst = libmca_pls_bproc_orted.la +component_noinst = libmca_odls_bproc.la component_install = endif sources = \ - pls_bproc_orted.h \ - pls_bproc_orted.c \ - pls_bproc_orted_component.c + odls_bproc.h \ + odls_bproc.c \ + odls_bproc_component.c mcacomponentdir = $(libdir)/openmpi mcacomponent_LTLIBRARIES = $(component_install) -mca_pls_bproc_orted_la_SOURCES = $(sources) -mca_pls_bproc_orted_la_LIBADD = \ - $(pls_bproc_orted_LIBS) \ +mca_odls_bproc_la_SOURCES = $(sources) +mca_odls_bproc_la_LIBADD = \ + $(odls_bproc_LIBS) \ $(top_ompi_builddir)/orte/liborte.la \ $(top_ompi_builddir)/opal/libopal.la -mca_pls_bproc_orted_la_LDFLAGS = -module -avoid-version $(pls_bproc_orted_LDFLAGS) - +mca_odls_bproc_la_LDFLAGS = -module -avoid-version $(odls_bproc_LDFLAGS) + noinst_LTLIBRARIES = $(component_noinst) -libmca_pls_bproc_orted_la_SOURCES = $(sources) -libmca_pls_bproc_orted_la_LIBADD = $(pls_bproc_orted_LIBS) -libmca_pls_bproc_orted_la_LDFLAGS = -module -avoid-version $(pls_bproc_orted_LDFLAGS) +libmca_odls_bproc_la_SOURCES = $(sources) +libmca_odls_bproc_la_LIBADD = $(odls_bproc_LIBS) +libmca_odls_bproc_la_LDFLAGS = -module -avoid-version $(odls_bproc_LDFLAGS) diff --git a/orte/mca/odls/bproc/configure.m4 b/orte/mca/odls/bproc/configure.m4 new file mode 100644 index 0000000000..c042c4689d --- /dev/null +++ b/orte/mca/odls/bproc/configure.m4 @@ -0,0 +1,38 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_odls_bproc_CONFIG([action-if-found], [action-if-not-found]) +# ----------------------------------------------------------- +AC_DEFUN([MCA_odls_bproc_CONFIG],[ + OMPI_CHECK_BPROC([odls_bproc], [odls_bproc_good=1], + [odls_bproc_good=1], [odls_bproc_good=0]) + + # if check worked, set wrapper flags if so. + # Evaluate succeed / fail + AS_IF([test "$odls_bproc_good" = "1"], + [odls_bproc_WRAPPER_EXTRA_LDFLAGS="$odls_bproc_LDFLAGS" + odls_bproc_WRAPPER_EXTRA_LIBS="$odls_bproc_LIBS" + $1], + [$2]) + + # set build flags to use in makefile + AC_SUBST([odls_bproc_CPPFLAGS]) + AC_SUBST([odls_bproc_LDFLAGS]) + AC_SUBST([odls_bproc_LIBS]) +])dnl diff --git a/orte/mca/odls/bproc/configure.params b/orte/mca/odls/bproc/configure.params new file mode 100644 index 0000000000..77969f612b --- /dev/null +++ b/orte/mca/odls/bproc/configure.params @@ -0,0 +1,23 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Specific to this module + +PARAM_INIT_FILE=odls_bproc_component.c +PARAM_CONFIG_FILES="Makefile" diff --git a/orte/mca/pls/bproc_orted/pls_bproc_orted.c b/orte/mca/odls/bproc/odls_bproc.c similarity index 55% rename from orte/mca/pls/bproc_orted/pls_bproc_orted.c rename to orte/mca/odls/bproc/odls_bproc.c index 901606c676..b349d7b1a7 100644 --- a/orte/mca/pls/bproc_orted/pls_bproc_orted.c +++ b/orte/mca/odls/bproc/odls_bproc.c @@ -18,7 +18,7 @@ /** * @file: * Part of the bproc launcher. - * See pls_bproc_orted.h for an overview of how it works. + * See odls_bproc.h for an overview of how it works. */ #include "orte_config.h" #include @@ -44,35 +44,32 @@ #include "orte/mca/iof/base/iof_base_setup.h" #include "orte/mca/ns/base/base.h" #include "orte/mca/oob/base/base.h" -#include "orte/mca/pls/base/base.h" -#include "orte/mca/rmaps/base/rmaps_base_map.h" +#include "orte/mca/rml/rml.h" #include "orte/util/session_dir.h" #include "orte/util/univ_info.h" -#include "pls_bproc_orted.h" +#include "odls_bproc.h" /** * Initialization of the bproc_orted module with all the needed function pointers */ -orte_pls_base_module_1_0_0_t orte_pls_bproc_orted_module = { - orte_pls_bproc_orted_launch, - orte_pls_bproc_orted_terminate_job, - orte_pls_bproc_orted_terminate_proc, - orte_pls_bproc_orted_signal_job, - orte_pls_bproc_orted_signal_proc, - orte_pls_bproc_orted_finalize +orte_odls_base_module_t orte_odls_bproc_module = { + orte_odls_bproc_subscribe_launch_data, + orte_odls_bproc_launch_local_procs, + orte_odls_bproc_kill_local_procs, + orte_odls_bproc_signal_local_procs }; -static int pls_bproc_orted_make_dir(char *directory); -static char * pls_bproc_orted_get_base_dir_name(int proc_rank, orte_jobid_t jobid, - size_t app_context); -static void pls_bproc_orted_delete_dir_tree(char * path); -static int pls_bproc_orted_remove_dir(void); -static void pls_bproc_orted_send_cb(int status, orte_process_name_t * peer, +static int odls_bproc_make_dir(char *directory); +static char * odls_bproc_get_base_dir_name(int proc_rank, orte_jobid_t jobid, + orte_std_cntr_t app_context); +static void odls_bproc_delete_dir_tree(char * path); +static int odls_bproc_remove_dir(void); +static void odls_bproc_send_cb(int status, orte_process_name_t * peer, orte_buffer_t* buffer, int tag, void* cbdata); -static int pls_bproc_orted_setup_stdio(orte_process_name_t *proc_name, +static int odls_bproc_setup_stdio(orte_process_name_t *proc_name, int proc_rank, orte_jobid_t jobid, - size_t app_context, bool connect_stdin); + orte_std_cntr_t app_context, bool connect_stdin); /** @@ -83,13 +80,13 @@ static int pls_bproc_orted_setup_stdio(orte_process_name_t *proc_name, * @retval error */ static int -pls_bproc_orted_make_dir(char *directory) +odls_bproc_make_dir(char *directory) { struct stat buf; mode_t my_mode = S_IRWXU; /* at the least, I need to be able to do anything */ if (0 == stat(directory, &buf)) { /* exists - delete it and its contents */ - pls_bproc_orted_delete_dir_tree(directory); + odls_bproc_delete_dir_tree(directory); } /* try to create it with proper mode */ return(opal_os_dirpath_create(directory, my_mode)); @@ -108,8 +105,8 @@ pls_bproc_orted_make_dir(char *directory) * @retval path */ static char * - pls_bproc_orted_get_base_dir_name(int proc_rank, orte_jobid_t jobid, - size_t app_context) + odls_bproc_get_base_dir_name(int proc_rank, orte_jobid_t jobid, + orte_std_cntr_t app_context) { char *path = NULL, *user = NULL, *job = NULL; int rc; @@ -141,6 +138,9 @@ static char * ORTE_ERROR_LOG(ORTE_ERROR); path = NULL; } + if(0 < mca_odls_bproc_component.debug) { + opal_output(0, "odls bproc io setup. Path: %s\n", path); + } free(user); free(job); return path; @@ -152,7 +152,7 @@ static char * * @param path the path to the base directory to delete */ static void -pls_bproc_orted_delete_dir_tree(char * path) +odls_bproc_delete_dir_tree(char * path) { DIR *dp; struct dirent *ep; @@ -170,7 +170,7 @@ pls_bproc_orted_delete_dir_tree(char * path) filenm = opal_os_path(false, path, ep->d_name, NULL); ret = stat(filenm, &buf); if (ret < 0 || S_ISDIR(buf.st_mode)) { - pls_bproc_orted_delete_dir_tree(filenm); + odls_bproc_delete_dir_tree(filenm); free(filenm); continue; } @@ -190,7 +190,7 @@ pls_bproc_orted_delete_dir_tree(char * path) * @retval error */ static int -pls_bproc_orted_remove_dir() +odls_bproc_remove_dir() { char *frontend = NULL, *user = NULL, *filename = NULL; int id; @@ -213,7 +213,7 @@ pls_bproc_orted_remove_dir() return ORTE_ERROR; } /* we do our best to clean up the directory tree, but we ignore errors*/ - pls_bproc_orted_delete_dir_tree(frontend); + odls_bproc_delete_dir_tree(frontend); free(frontend); return ORTE_SUCCESS; } @@ -228,7 +228,7 @@ pls_bproc_orted_remove_dir() * @param cbdata */ static void -pls_bproc_orted_send_cb(int status, orte_process_name_t * peer, +odls_bproc_send_cb(int status, orte_process_name_t * peer, orte_buffer_t* buffer, int tag, void* cbdata) { OBJ_RELEASE(buffer); @@ -257,9 +257,9 @@ pls_bproc_orted_send_cb(int status, orte_process_name_t * peer, * @retval error */ static int -pls_bproc_orted_setup_stdio(orte_process_name_t *proc_name, int proc_rank, +odls_bproc_setup_stdio(orte_process_name_t *proc_name, int proc_rank, orte_jobid_t jobid, - size_t app_context, bool connect_stdin) + orte_std_cntr_t app_context, bool connect_stdin) { char *path_prefix, *fd_link_path = NULL; int rc = ORTE_SUCCESS, fd; @@ -269,7 +269,7 @@ pls_bproc_orted_setup_stdio(orte_process_name_t *proc_name, int proc_rank, struct termios term_attrs; #endif - path_prefix = pls_bproc_orted_get_base_dir_name(proc_rank, jobid, app_context); + path_prefix = odls_bproc_get_base_dir_name(proc_rank, jobid, (size_t)app_context); if (NULL == path_prefix) { rc = ORTE_ERROR; ORTE_ERROR_LOG(rc); @@ -277,7 +277,7 @@ pls_bproc_orted_setup_stdio(orte_process_name_t *proc_name, int proc_rank, } /* check for existence and access, or create it */ - if (ORTE_SUCCESS != (rc = pls_bproc_orted_make_dir(path_prefix))) { + if (ORTE_SUCCESS != (rc = odls_bproc_make_dir(path_prefix))) { ORTE_ERROR_LOG(rc); goto cleanup; } @@ -294,7 +294,7 @@ pls_bproc_orted_setup_stdio(orte_process_name_t *proc_name, int proc_rank, if (connect_stdin) { if (0 != mkfifo(fd_link_path, S_IRWXU)) { - perror("pls_bproc_orted mkfifo failed"); + perror("odls_bproc mkfifo failed"); rc = ORTE_ERROR; ORTE_ERROR_LOG(rc); goto cleanup; @@ -302,7 +302,7 @@ pls_bproc_orted_setup_stdio(orte_process_name_t *proc_name, int proc_rank, fd = open(fd_link_path, O_RDWR); if (-1 == fd) { - perror("pls_bproc_orted open failed"); + perror("odls_bproc open failed"); rc = ORTE_ERROR; ORTE_ERROR_LOG(rc); goto cleanup; @@ -312,7 +312,7 @@ pls_bproc_orted_setup_stdio(orte_process_name_t *proc_name, int proc_rank, ORTE_IOF_STDIN, fd); } else { if(0 != symlink("/dev/null", fd_link_path)) { - perror("pls_bproc_orted could not create symlink"); + perror("odls_bproc could not create symlink"); rc = ORTE_ERROR; ORTE_ERROR_LOG(rc); goto cleanup; @@ -332,7 +332,7 @@ pls_bproc_orted_setup_stdio(orte_process_name_t *proc_name, int proc_rank, #if defined(HAVE_OPENPTY) && (OMPI_ENABLE_PTY_SUPPORT != 0) if (0 != openpty(&amaster, &aslave, pty_name, NULL, NULL)) { - opal_output(0, "pls_bproc_orted: openpty failed, using pipes instead"); + opal_output(0, "odls_bproc: openpty failed, using pipes instead"); goto stdout_fifo_setup; } @@ -366,14 +366,14 @@ stdout_fifo_setup: #endif if (0 != mkfifo(fd_link_path, S_IRWXU)) { - perror("pls_bproc_orted mkfifo failed"); + perror("odls_bproc mkfifo failed"); rc = ORTE_ERROR; goto cleanup; } fd = open(fd_link_path, O_RDWR); if (-1 == fd) { - perror("pls_bproc_orted open failed"); + perror("odls_bproc open failed"); rc = ORTE_ERROR; goto cleanup; } @@ -395,14 +395,14 @@ stderr_fifo_setup: } if (0 != mkfifo(fd_link_path, S_IRWXU)) { - perror("pls_bproc_orted mkfifo failed"); + perror("odls_bproc mkfifo failed"); rc = ORTE_ERROR; goto cleanup; } fd = open(fd_link_path, O_RDWR); if (-1 == fd) { - perror("pls_bproc_orted open failed"); + perror("odls_bproc open failed"); rc = ORTE_ERROR; goto cleanup; } @@ -421,28 +421,118 @@ cleanup: } +/* this entire function gets called within a GPR compound command, + * so the subscription actually doesn't get done until the orted + * executes the compound command + */ +int orte_odls_bproc_subscribe_launch_data(orte_jobid_t job, orte_gpr_notify_cb_fn_t cbfunc) +{ + char *segment; + orte_gpr_value_t *values[1]; + orte_gpr_subscription_t *subs, sub=ORTE_GPR_SUBSCRIPTION_EMPTY; + orte_gpr_trigger_t *trigs, trig=ORTE_GPR_TRIGGER_EMPTY; + char* keys[] = { + ORTE_PROC_NAME_KEY, + ORTE_PROC_APP_CONTEXT_KEY, + ORTE_NODE_NAME_KEY, + }; + int num_keys = 3; + int i, rc; + + /* get the job segment name */ + if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, job))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* attach ourselves to the "standard" orted trigger */ + if (ORTE_SUCCESS != + (rc = orte_schema.get_std_trigger_name(&(trig.name), + ORTED_LAUNCH_STAGE_GATE_TRIGGER, job))) { + ORTE_ERROR_LOG(rc); + free(segment); + return rc; + } + + /* ask for return of all data required for launching local processes */ + subs = ⊂ + sub.action = ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG; + if (ORTE_SUCCESS != (rc = orte_schema.get_std_subscription_name(&(sub.name), + ORTED_LAUNCH_STG_SUB, + job))) { + ORTE_ERROR_LOG(rc); + free(segment); + free(trig.name); + return rc; + } + sub.cnt = 1; + sub.values = values; + + if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[0]), ORTE_GPR_KEYS_OR | ORTE_GPR_TOKENS_OR, + segment, num_keys, 0))) { + ORTE_ERROR_LOG(rc); + free(segment); + free(sub.name); + free(trig.name); + return rc; + } + for (i=0; i < num_keys; i++) { + if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[0]->keyvals[i]), + keys[i], ORTE_UNDEF, NULL))) { + ORTE_ERROR_LOG(rc); + free(segment); + free(sub.name); + free(trig.name); + OBJ_RELEASE(values[0]); + return rc; + } + } + + sub.cbfunc = cbfunc; + + trigs = &trig; + + /* do the subscription */ + if (ORTE_SUCCESS != (rc = orte_gpr.subscribe(1, &subs, 1, &trigs))) { + ORTE_ERROR_LOG(rc); + } + free(segment); + free(sub.name); + free(trig.name); + OBJ_RELEASE(values[0]); + + return rc; +} + /** * Setup io for the current node, then tell orterun we are ready for the actual * processes. - * @param jobid The jobid of the job to launch * @retval ORTE_SUCCESS * @retval error */ int -orte_pls_bproc_orted_launch(orte_jobid_t jobid) +orte_odls_bproc_launch_local_procs(orte_gpr_notify_data_t *data) { - opal_list_t map; - orte_rmaps_base_map_t * mapping; - orte_rmaps_base_proc_t * proc; + odls_bproc_child_t *child; opal_list_item_t* item; + orte_gpr_value_t *value, **values; + orte_gpr_keyval_t *kval; + char *node_name; int rc; - int num_procs = 0; - size_t i; + orte_std_cntr_t i, j, kv, kv2, *sptr; int src = 0; orte_buffer_t *ack; - char * param; bool connect_stdin; - char * pty_name = NULL; + orte_jobid_t jobid; + + /* first, retrieve the job number we are to launch from the + * returned data - we can extract the jobid directly from the + * subscription name we created + */ + if (ORTE_SUCCESS != (rc = orte_schema.extract_jobid_from_std_trigger_name(&jobid, data->target))) { + ORTE_ERROR_LOG(rc); + return rc; + } /** * hack for bproc4, change process group so that we do not receive signals @@ -451,55 +541,87 @@ orte_pls_bproc_orted_launch(orte_jobid_t jobid) */ setpgid(0,0); - /* get current node number */ - rc = bproc_currnode(); - if(0 > rc) { - opal_output(0, "pls_bproc_orted component running on invalid node"); - } - if(0 > asprintf(¶m, "%d", rc)) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - goto cleanup; - } - /* query the allocation for this node */ - OBJ_CONSTRUCT(&map, opal_list_t); - rc = orte_rmaps_base_get_node_map(orte_process_info.my_name->cellid, jobid, - param, &map); - free(param); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - goto cleanup; + /* loop through the returned data to find the global info and + * the info for processes going onto this node + */ + values = (orte_gpr_value_t**)(data->values)->addr; + for (j=0, i=0; i < data->cnt && j < (data->values)->size; j++) { /* loop through all returned values */ + if (NULL != values[j]) { + i++; + value = values[j]; + /* this must have come from one of the process containers, so it must + * contain data for a proc structure - see if it belongs to this node + */ + for (kv=0; kv < value->cnt; kv++) { + kval = value->keyvals[kv]; + if (strcmp(kval->key, ORTE_NODE_NAME_KEY) == 0) { + /* Most C-compilers will bark if we try to directly compare the string in the + * kval data area against a regular string, so we need to "get" the data + * so we can access it */ + if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&node_name, kval->value, ORTE_STRING))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* if this is our node...must also protect against a zero-length string */ + if (NULL != node_name && 0 == strcmp(node_name, orte_system_info.nodename)) { + /* ...harvest the info into a new child structure */ + child = OBJ_NEW(odls_bproc_child_t); + for (kv2 = 0; kv2 < value->cnt; kv2++) { + kval = value->keyvals[kv2]; + if(strcmp(kval->key, ORTE_PROC_NAME_KEY) == 0) { + /* copy the name into the child object */ + if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(child->name), kval->value->data, ORTE_NAME))) { + ORTE_ERROR_LOG(rc); + return rc; + } + continue; + } + if(strcmp(kval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) { + if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, kval->value, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + return rc; + } + child->app_idx = *sptr; /* save the index into the app_context objects */ + continue; + } + } /* kv2 */ + /* protect operation on the global list of children */ + OPAL_THREAD_LOCK(&mca_odls_bproc_component.mutex); + opal_list_append(&mca_odls_bproc_component.children, &child->super); + opal_condition_signal(&mca_odls_bproc_component.cond); + OPAL_THREAD_UNLOCK(&mca_odls_bproc_component.mutex); + + } + } + } /* for kv */ + } /* for j */ } - /* figure out what processes will be on this node and set up the io files */ - for(item = opal_list_get_first(&map); - item != opal_list_get_end(&map); + /* set up the io files for our children */ + for(item = opal_list_get_first(&mca_odls_bproc_component.children); + item != opal_list_get_end(&mca_odls_bproc_component.children); item = opal_list_get_next(item)) { - mapping = (orte_rmaps_base_map_t *) item; - num_procs = 0; - for(i = mapping->num_procs; i > 0; i--) { - proc = mapping->procs[i - 1]; - if(0 < mca_pls_bproc_orted_component.debug) { - opal_output(0, "orte_pls_bproc_orted_launch: setting up io for " - "[%lu,%lu,%lu] proc rank %lu\n", - ORTE_NAME_ARGS((&proc->proc_name)), - proc->proc_rank); - } - /* only setup to forward stdin if it is rank 0, otherwise connect - * to /dev/null */ - if(0 == proc->proc_rank) { - connect_stdin = true; - } else { - connect_stdin = false; - } + child = (odls_bproc_child_t *) item; + if(0 < mca_odls_bproc_component.debug) { + opal_output(0, "orte_odls_bproc_launch: setting up io for " + "[%lu,%lu,%lu] proc rank %lu\n", + ORTE_NAME_ARGS((child->name)), + child->name->vpid); + } + /* only setup to forward stdin if it is rank 0, otherwise connect + * to /dev/null */ + if(0 == child->name->vpid) { + connect_stdin = true; + } else { + connect_stdin = false; + } - rc = pls_bproc_orted_setup_stdio(&proc->proc_name, num_procs, - jobid, mapping->app->idx, - connect_stdin); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - num_procs++; + rc = odls_bproc_setup_stdio(child->name, (int)child->name->vpid, + jobid, child->app_idx, + connect_stdin); + if (ORTE_SUCCESS != rc) { + ORTE_ERROR_LOG(rc); + goto cleanup; } } @@ -509,8 +631,8 @@ orte_pls_bproc_orted_launch(orte_jobid_t jobid) if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); } - rc = mca_oob_send_packed_nb(MCA_OOB_NAME_SEED, ack, MCA_OOB_TAG_BPROC, 0, - pls_bproc_orted_send_cb, NULL); + rc = mca_oob_send_packed_nb(ORTE_RML_NAME_SEED, ack, ORTE_RML_TAG_BPROC, 0, + odls_bproc_send_cb, NULL); if (0 > rc) { ORTE_ERROR_LOG(rc); goto cleanup; @@ -518,13 +640,7 @@ orte_pls_bproc_orted_launch(orte_jobid_t jobid) rc = ORTE_SUCCESS; cleanup: - while(NULL != (item = opal_list_remove_first(&map))) { - OBJ_RELEASE(item); - } - if(NULL != pty_name) { - free(pty_name); - } - OBJ_DESTRUCT(&map); + return rc; } @@ -532,37 +648,8 @@ cleanup: * Function to terminate a job. Since this component only runs on remote nodes * and doesn't actually launch any processes, this function is not needed * so is a noop. - * @param jobid The job to terminate - * @retval ORTE_SUCCESS */ -int orte_pls_bproc_orted_terminate_job(orte_jobid_t jobid) -{ - orte_iof.iof_flush(); - return ORTE_SUCCESS; -} - -/** - * Function to terminate a process. Since this component only runs on remote nodes - * and doesn't actually launch any processes, this function is not needed - * so is a noop. - * @param proc the process's name - * @retval ORTE_SUCCESS - */ -int orte_pls_bproc_orted_terminate_proc(const orte_process_name_t* proc) -{ - orte_iof.iof_flush(); - return ORTE_SUCCESS; -} - -/** - * Function to signal a job. Since this component only runs on remote nodes - * and doesn't actually launch any processes, this function is not needed - * so is a noop. - * @param jobid The job to signal - * @param signal The signal to send - * @retval ORTE_SUCCESS - */ -int orte_pls_bproc_orted_signal_job(orte_jobid_t jobid, int32_t signal) +int orte_odls_bproc_kill_local_procs(orte_jobid_t job, bool set_state) { orte_iof.iof_flush(); return ORTE_SUCCESS; @@ -576,7 +663,7 @@ int orte_pls_bproc_orted_signal_job(orte_jobid_t jobid, int32_t signal) * @param signal The signal to send * @retval ORTE_SUCCESS */ -int orte_pls_bproc_orted_signal_proc(const orte_process_name_t* proc, int32_t signal) +int orte_odls_bproc_signal_local_procs(orte_process_name_t* proc, int32_t signal) { orte_iof.iof_flush(); return ORTE_SUCCESS; @@ -584,14 +671,14 @@ int orte_pls_bproc_orted_signal_proc(const orte_process_name_t* proc, int32_t si /** - * Finalizes the bproc_orted module. Cleanup tmp directory/files + * Finalizes the bproc module. Cleanup tmp directory/files * used for I/O forwarding. * @retval ORTE_SUCCESS */ -int orte_pls_bproc_orted_finalize(void) +int orte_odls_bproc_finalize(void) { orte_iof.iof_flush(); - pls_bproc_orted_remove_dir(); + odls_bproc_remove_dir(); orte_session_dir_finalize(orte_process_info.my_name); return ORTE_SUCCESS; } diff --git a/orte/mca/odls/bproc/odls_bproc.h b/orte/mca/odls/bproc/odls_bproc.h new file mode 100644 index 0000000000..26228e0715 --- /dev/null +++ b/orte/mca/odls/bproc/odls_bproc.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file: + * Part of the bproc launching system. This launching system is broken into 2 + * parts: one runs under the PLS on the head node to launch the orteds, and the + * other serves as the orted's local launcher. + * + * The main job of this component is to setup ptys/pipes for IO forwarding. + * See pls_bproc.h for an overview of how the entire bproc launching system works. + */ +#ifndef ORTE_ODLS_BPROC_H_ +#define ORTE_ODLS_BPROC_H_ + +#include "orte_config.h" + +#include + +#include "opal/mca/mca.h" +#include "opal/threads/condition.h" + +#include "orte/mca/odls/odls.h" + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +/* + * Module open / close + */ +int orte_odls_bproc_component_open(void); +int orte_odls_bproc_component_close(void); +int orte_odls_bproc_finalize(void); +orte_odls_base_module_t* orte_odls_bproc_init(int *priority); + +/* + * Startup / Shutdown + */ +int orte_odls_bproc_finalize(void); + +/* + * Interface + */ +int orte_odls_bproc_subscribe_launch_data(orte_jobid_t job, orte_gpr_notify_cb_fn_t cbfunc); +int orte_odls_bproc_launch_local_procs(orte_gpr_notify_data_t *data); +int orte_odls_bproc_kill_local_procs(orte_jobid_t job, bool set_state); +int orte_odls_bproc_signal_local_procs(orte_process_name_t* proc_name, int32_t signal); + +/** + * ODLS bproc_orted component + */ +struct orte_odls_bproc_component_t { + orte_odls_base_component_t super; + /**< The base class */ + int debug; + /**< If greater than 0 print debugging information */ + int priority; + /**< The priority of this component. This will be returned if we determine + * that bproc is available and running on this node, */ + opal_mutex_t lock; + /**< Lock used to prevent some race conditions */ + opal_condition_t cond; + /**< Condition used to wake up waiting threads */ + opal_list_t children; + /**< list of children on this node */ +}; +/** + * Convenience typedef + */ +typedef struct orte_odls_bproc_component_t orte_odls_bproc_component_t; + +/* + * List object to locally store the process names and pids of + * our children. This can subsequently be used to order termination + * or pass signals without looking the info up again. + */ +typedef struct odls_bproc_child_t { + opal_list_item_t super; /* required to place this on a list */ + orte_process_name_t *name; /* the OpenRTE name of the proc */ + pid_t pid; /* local pid of the proc */ + orte_std_cntr_t app_idx; /* index of the app_context for this proc */ + bool alive; /* is this proc alive? */ +} odls_bproc_child_t; +ORTE_DECLSPEC OBJ_CLASS_DECLARATION(odls_bproc_child_t); + +ORTE_DECLSPEC orte_odls_bproc_component_t mca_odls_bproc_component; +ORTE_DECLSPEC orte_odls_base_module_t orte_odls_bproc_module; + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif +#endif /* ORTE_ODLS_BPROC_H_ */ + diff --git a/orte/mca/pls/bproc_orted/pls_bproc_orted_component.c b/orte/mca/odls/bproc/odls_bproc_component.c similarity index 51% rename from orte/mca/pls/bproc_orted/pls_bproc_orted_component.c rename to orte/mca/odls/bproc/odls_bproc_component.c index dae301d053..2fc08b20af 100644 --- a/orte/mca/pls/bproc_orted/pls_bproc_orted_component.c +++ b/orte/mca/odls/bproc/odls_bproc_component.c @@ -21,32 +21,51 @@ * Takes care of the component stuff for the MCA. */ #include "orte_config.h" -#include "opal/mca/base/mca_base_param.h" #include "orte/orte_constants.h" -#include "orte/mca/pls/pls.h" + +#include "opal/mca/base/mca_base_param.h" + #include "orte/util/proc_info.h" -#include "pls_bproc_orted.h" + +#include "orte/mca/odls/odls.h" +#include "odls_bproc.h" + +/* instance the child list object */ +static void odls_bproc_child_constructor(odls_bproc_child_t *ptr) +{ + ptr->name = NULL; + ptr->app_idx = -1; + ptr->alive = false; +} +static void odls_bproc_child_destructor(odls_bproc_child_t *ptr) +{ + if (NULL != ptr->name) free(ptr->name); +} +OBJ_CLASS_INSTANCE(odls_bproc_child_t, + opal_list_item_t, + odls_bproc_child_constructor, + odls_bproc_child_destructor); /** - * The bproc_orted component data structure used to store all the relevent data + * The bproc component data structure used to store all the relevent data * about this component. */ -orte_pls_bproc_orted_component_t mca_pls_bproc_orted_component = { +orte_odls_bproc_component_t mca_odls_bproc_component = { { /* First, the mca_component_t struct containing meta information about the component itself */ { - /* Indicate that we are a pls v1.0.0 component (which also + /* Indicate that we are a odls v1.3.0 component (which also implies a specific MCA version) */ - ORTE_PLS_BASE_VERSION_1_0_0, + ORTE_ODLS_BASE_VERSION_1_3_0, /* Component name and version */ - "bproc_orted", + "bproc", ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION, ORTE_RELEASE_VERSION, /* Component open and close functions */ - orte_pls_bproc_orted_component_open, - orte_pls_bproc_orted_component_close + orte_odls_bproc_component_open, + orte_odls_bproc_component_close }, /* Next the MCA v1.0.0 component meta data */ { @@ -54,7 +73,8 @@ orte_pls_bproc_orted_component_t mca_pls_bproc_orted_component = { false }, /* Initialization / querying functions */ - orte_pls_bproc_orted_init + orte_odls_bproc_init, + orte_odls_bproc_finalize } }; @@ -62,18 +82,20 @@ orte_pls_bproc_orted_component_t mca_pls_bproc_orted_component = { * Opens the pls_bproc component, setting all the needed mca parameters and * finishes setting up the component struct. */ -int orte_pls_bproc_orted_component_open(void) +int orte_odls_bproc_component_open(void) { /* initialize globals */ - OBJ_CONSTRUCT(&mca_pls_bproc_orted_component.lock, opal_mutex_t); + OBJ_CONSTRUCT(&mca_odls_bproc_component.lock, opal_mutex_t); + OBJ_CONSTRUCT(&mca_odls_bproc_component.cond, opal_condition_t); + OBJ_CONSTRUCT(&mca_odls_bproc_component.children, opal_list_t); /* lookup parameters */ - mca_base_param_reg_int(&mca_pls_bproc_orted_component.super.pls_version, + mca_base_param_reg_int(&mca_odls_bproc_component.super.version, "priority", NULL, false, false, 100, - &mca_pls_bproc_orted_component.priority); - mca_base_param_reg_int(&mca_pls_bproc_orted_component.super.pls_version, + &mca_odls_bproc_component.priority); + mca_base_param_reg_int(&mca_odls_bproc_component.super.version, "debug", "If > 0 prints library debugging information", - false, false, 0, &mca_pls_bproc_orted_component.debug); + false, false, 0, &mca_odls_bproc_component.debug); return ORTE_SUCCESS; } @@ -81,16 +103,16 @@ int orte_pls_bproc_orted_component_open(void) * Initializes the module. We do not want to run unless we are not the seed, * bproc is running, and we are not on the master node. */ -orte_pls_base_module_t *orte_pls_bproc_orted_init(int *priority) +orte_odls_base_module_t *orte_odls_bproc_init(int *priority) { int ret; struct bproc_version_t version; - /* are we the seed */ - if(orte_process_info.seed == true) - return NULL; - - /* okay, we are in a daemon - now check to see if BProc is running here */ + /* the base open/select logic protects us against operation when + * we are NOT in a daemon, so we don't have to check that here + */ + + /* check to see if BProc is running here */ ret = bproc_version(&version); if (ret != 0) { return NULL; @@ -101,16 +123,29 @@ orte_pls_base_module_t *orte_pls_bproc_orted_init(int *priority) return NULL; } - *priority = mca_pls_bproc_orted_component.priority; - return &orte_pls_bproc_orted_module; + *priority = mca_odls_bproc_component.priority; + return &orte_odls_bproc_module; } /** * Component close function. */ -int orte_pls_bproc_orted_component_close(void) +int orte_odls_bproc_component_close(void) { - OBJ_DESTRUCT(&mca_pls_bproc_orted_component.lock); + OBJ_DESTRUCT(&mca_odls_bproc_component.lock); + OBJ_DESTRUCT(&mca_odls_bproc_component.cond); + OBJ_DESTRUCT(&mca_odls_bproc_component.children); return ORTE_SUCCESS; } +int orte_odls_bproc_component_finalize(void) +{ + opal_list_item_t *item; + + /* cleanup state */ + while (NULL != (item = opal_list_remove_first(&mca_odls_bproc_component.children))) { + OBJ_RELEASE(item); + } + + return ORTE_SUCCESS; +} diff --git a/orte/mca/odls/default/Makefile.am b/orte/mca/odls/default/Makefile.am new file mode 100644 index 0000000000..9f01dffe17 --- /dev/null +++ b/orte/mca/odls/default/Makefile.am @@ -0,0 +1,48 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +dist_pkgdata_DATA = help-odls-default.txt + +sources = \ + odls_default.h \ + odls_default_component.c \ + odls_default_module.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if OMPI_BUILD_odls_default_DSO +component_noinst = +component_install = mca_odls_default.la +else +component_noinst = libmca_odls_default.la +component_install = +endif + +mcacomponentdir = $(libdir)/openmpi +mcacomponent_LTLIBRARIES = $(component_install) +mca_odls_default_la_SOURCES = $(sources) +mca_odls_default_la_LDFLAGS = -module -avoid-version +mca_odls_default_la_LIBADD = \ + $(top_ompi_builddir)/orte/liborte.la \ + $(top_ompi_builddir)/opal/libopal.la + +noinst_LTLIBRARIES = $(component_noinst) +libmca_odls_default_la_SOURCES =$(sources) +libmca_odls_default_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/pls/fork/configure.m4 b/orte/mca/odls/default/configure.m4 similarity index 88% rename from orte/mca/pls/fork/configure.m4 rename to orte/mca/odls/default/configure.m4 index 510993a9ea..fbfacfe3f7 100644 --- a/orte/mca/pls/fork/configure.m4 +++ b/orte/mca/odls/default/configure.m4 @@ -17,8 +17,9 @@ # $HEADER$ # -# MCA_pls_fork_CONFIG([action-if-found], [action-if-not-found]) +# MCA_odls_default_CONFIG([action-if-found], [action-if-not-found]) # ----------------------------------------------------------- -AC_DEFUN([MCA_pls_fork_CONFIG],[ +AC_DEFUN([MCA_odls_default_CONFIG],[ AC_CHECK_FUNC([fork], [$1], [$2]) ])dnl + diff --git a/orte/mca/pls/fork/configure.params b/orte/mca/odls/default/configure.params similarity index 94% rename from orte/mca/pls/fork/configure.params rename to orte/mca/odls/default/configure.params index bb8b415163..28850ed237 100644 --- a/orte/mca/pls/fork/configure.params +++ b/orte/mca/odls/default/configure.params @@ -17,5 +17,5 @@ # $HEADER$ # -PARAM_INIT_FILE=pls_fork_component.c +PARAM_INIT_FILE=odls_default_component.c PARAM_CONFIG_FILES="Makefile" diff --git a/orte/mca/pls/fork/help-orte-pls-fork.txt b/orte/mca/odls/default/help-odls-default.txt similarity index 70% rename from orte/mca/pls/fork/help-orte-pls-fork.txt rename to orte/mca/odls/default/help-odls-default.txt index c87e506bae..ba9c0e5b9d 100644 --- a/orte/mca/pls/fork/help-orte-pls-fork.txt +++ b/orte/mca/odls/default/help-odls-default.txt @@ -16,36 +16,23 @@ # # $HEADER$ # -# This is the US/English general help file for Open RTE's orterun. +# This is the US/English general help file for Open RTE's orted launcher. # -[orte-pls-fork:chdir-error] +[odls-default:chdir-error] Failed to change to the working directory: Host: %s Directory: %s The error returned was "%s". Execution will now abort. -[orte-pls-fork:argv0-not-found] -Failed to find the following executable: - -Host: %s -Executable: %s - -Cannot continue. -[orte-pls-fork:argv0-not-accessible] +[odls-default:argv0-not-accessible] Failed to find or execute the following executable: Host: %s Executable: %s Cannot continue. -[orte-pls-fork:execv-error] -Could not execute the executable "%s": %s - -This could mean that your PATH or executable name is wrong, or that you do not -have the necessary permissions. Please ensure that the executable is able to be -found and executed. -[orte-pls-fork:could-not-kill] +[odls-default:could-not-kill] WARNING: A process refused to die! Host: %s @@ -53,7 +40,7 @@ PID: %d This process may still be running and/or consuming resources. -[orte-pls-fork:could-not-kill] +[odls-default:could-not-send-kill] WARNING: A process refused the kill SIGTERM signal! This should never happen unless the application is changing the parent/child relationship permissions. diff --git a/orte/mca/odls/default/odls_default.h b/orte/mca/odls/default/odls_default.h new file mode 100644 index 0000000000..9eb27027c5 --- /dev/null +++ b/orte/mca/odls/default/odls_default.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file: + */ + +#ifndef ORTE_ODLS_H +#define ORTE_ODLS_H + +#include "orte_config.h" + +#include "opal/threads/mutex.h" +#include "opal/threads/condition.h" +#include "opal/mca/mca.h" + +#include "orte/mca/ns/ns_types.h" +#include "orte/mca/gpr/gpr_types.h" + +#include "orte/mca/odls/odls.h" + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +/* + * Module open / close + */ +int orte_odls_default_component_open(void); +int orte_odls_default_component_close(void); +orte_odls_base_module_t* orte_odls_default_component_init(int *priority); + +/* + * Startup / Shutdown + */ +int orte_odls_default_finalize(void); + +/* + * Interface + */ +int orte_odls_default_subscribe_launch_data(orte_jobid_t job, orte_gpr_notify_cb_fn_t cbfunc); +int orte_odls_default_launch_local_procs(orte_gpr_notify_data_t *data); +int orte_odls_default_kill_local_procs(orte_jobid_t job, bool set_state); +int orte_odls_default_signal_local_procs(orte_process_name_t *proc, + int32_t signal); + +/** + * ODLS Default globals + */ +typedef struct orte_odls_default_globals_t { + opal_mutex_t mutex; + opal_condition_t cond; + opal_list_t children; +} orte_odls_default_globals_t; + +extern orte_odls_default_globals_t orte_odls_default; + +/* + * List object to locally store the process names and pids of + * our children. This can subsequently be used to order termination + * or pass signals without looking the info up again. + */ +typedef struct odls_default_child_t { + opal_list_item_t super; /* required to place this on a list */ + orte_process_name_t *name; /* the OpenRTE name of the proc */ + pid_t pid; /* local pid of the proc */ + orte_std_cntr_t app_idx; /* index of the app_context for this proc */ + bool alive; /* is this proc alive? */ +} odls_default_child_t; +ORTE_DECLSPEC OBJ_CLASS_DECLARATION(odls_default_child_t); + +/* + * List object to locally store app_contexts returned by the + * registry subscription. Since we don't know how many app_contexts will + * be returned, we need to store them on a list. + */ +typedef struct odls_default_app_context_t { + opal_list_item_t super; /* required to place this on a list */ + orte_app_context_t *app_context; +} odls_default_app_context_t; +ORTE_DECLSPEC OBJ_CLASS_DECLARATION(odls_default_app_context_t); + +/* + * ODLS Default module + */ +extern orte_odls_base_module_t orte_odls_default_module; + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif +#endif /* ORTE_ODLS_H */ diff --git a/orte/mca/odls/default/odls_default_component.c b/orte/mca/odls/default/odls_default_component.c new file mode 100644 index 0000000000..c161b9bd46 --- /dev/null +++ b/orte/mca/odls/default/odls_default_component.c @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "orte_config.h" +#include "orte/orte_constants.h" + +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include + +#include "opal/util/argv.h" +#include "opal/util/path.h" +#include "opal/util/basename.h" +#include "opal/util/show_help.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rml/rml.h" + +#include "orte/mca/odls/odls.h" +#include "orte/mca/pls/base/pls_private.h" + +#include "orte/mca/odls/default/odls_default.h" + +/* Instantiate the component globals */ +orte_odls_default_globals_t orte_odls_default; + + +/* instance the child list object */ +static void odls_default_child_constructor(odls_default_child_t *ptr) +{ + ptr->name = NULL; + ptr->pid = 0; + ptr->app_idx = -1; + ptr->alive = false; +} +static void odls_default_child_destructor(odls_default_child_t *ptr) +{ + if (NULL != ptr->name) free(ptr->name); +} +OBJ_CLASS_INSTANCE(odls_default_child_t, + opal_list_item_t, + odls_default_child_constructor, + odls_default_child_destructor); + +/* instance the app_context list object */ +OBJ_CLASS_INSTANCE(odls_default_app_context_t, + opal_list_item_t, + NULL, NULL); + + +/* + * Instantiate the public struct with all of our public information + * and pointers to our public functions in it + */ + +orte_odls_base_component_t mca_odls_default_component = { + /* First, the mca_component_t struct containing meta information + about the component itself */ + { + /* Indicate that we are a odls v1.3.0 component (which also + implies a specific MCA version) */ + + ORTE_ODLS_BASE_VERSION_1_3_0, + /* Component name and version */ + + "default", + ORTE_MAJOR_VERSION, + ORTE_MINOR_VERSION, + ORTE_RELEASE_VERSION, + + /* Component open and close functions */ + + orte_odls_default_component_open, + orte_odls_default_component_close + }, + + /* Next the MCA v1.0.0 component meta data */ + + { + /* Whether the component is checkpointable or not */ + + true + }, + + /* Initialization / querying functions */ + + orte_odls_default_component_init, + orte_odls_default_finalize +}; + + + +int orte_odls_default_component_open(void) +{ + /* initialize globals */ + OBJ_CONSTRUCT(&orte_odls_default.mutex, opal_mutex_t); + OBJ_CONSTRUCT(&orte_odls_default.cond, opal_condition_t); + OBJ_CONSTRUCT(&orte_odls_default.children, opal_list_t); + + return ORTE_SUCCESS; +} + + +orte_odls_base_module_t *orte_odls_default_component_init(int *priority) +{ + /* the base open/select logic protects us against operation when + * we are NOT in a daemon, so we don't have to check that here + */ + + /* we have built some logic into the configure.m4 file that checks + * to see if we have "fork" support and only builds this component + * if we do. Hence, we only get here if we CAN build - in which + * case, we definitely should be considered for selection + */ + *priority = 1; /* let others override us - we are the default */ + + return &orte_odls_default_module; +} + + +int orte_odls_default_component_close(void) +{ + OBJ_DESTRUCT(&orte_odls_default.mutex); + OBJ_DESTRUCT(&orte_odls_default.cond); + OBJ_DESTRUCT(&orte_odls_default.children); + return ORTE_SUCCESS; +} + +int orte_odls_default_finalize(void) +{ + opal_list_item_t *item; + + /* cleanup state */ + while (NULL != (item = opal_list_remove_first(&orte_odls_default.children))) { + OBJ_RELEASE(item); + } + + return ORTE_SUCCESS; +} diff --git a/orte/mca/odls/default/odls_default_module.c b/orte/mca/odls/default/odls_default_module.c new file mode 100644 index 0000000000..043dbda2f9 --- /dev/null +++ b/orte/mca/odls/default/odls_default_module.c @@ -0,0 +1,1037 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + * These symbols are in a file by themselves to provide nice linker + * semantics. Since linkers generally pull in symbols by object + * files, keeping these symbols as the only symbols in this file + * prevents utility programs such as "ompi_info" from having to import + * entire components just to query their version and parameters. + */ + +#include "orte_config.h" +#include "orte/orte_constants.h" + +#include +#ifdef HAVE_UNISTD_H +#include +#endif +#include +#include +#ifdef HAVE_SYS_WAIT_H +#include +#endif +#include +#ifdef HAVE_FCNTL_H +#include +#endif +#include +#ifdef HAVE_SYS_PARAM_H +#include +#endif +#ifdef HAVE_NETDB_H +#include +#endif +#ifdef HAVE_SYS_STAT_H +#include +#endif /* HAVE_SYS_STAT_H */ + +#include "opal/event/event.h" +#include "opal/util/argv.h" +#include "opal/util/output.h" +#include "opal/util/os_path.h" +#include "opal/util/show_help.h" +#include "opal/util/path.h" +#include "opal/util/basename.h" +#include "opal/util/opal_environ.h" +#include "opal/mca/base/mca_base_param.h" +#include "opal/mca/paffinity/base/base.h" + +#include "orte/dss/dss.h" +#include "orte/util/sys_info.h" +#include "orte/util/univ_info.h" +#include "orte/util/session_dir.h" +#include "orte/runtime/orte_wait.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/errmgr/base/base.h" +#include "orte/mca/iof/iof.h" +#include "orte/mca/iof/base/iof_base_setup.h" +#include "orte/mca/ns/ns.h" +#include "orte/mca/sds/base/base.h" +#include "orte/mca/rmgr/rmgr.h" +#include "orte/mca/rml/rml.h" +#include "orte/mca/gpr/gpr.h" +#include "orte/mca/rmaps/base/base.h" +#include "orte/mca/smr/smr.h" + +#include "orte/mca/odls/base/odls_private.h" +#include "orte/mca/odls/default/odls_default.h" + +extern char **environ; + +static void set_handler_default(int sig); + +orte_odls_base_module_t orte_odls_default_module = { + orte_odls_default_subscribe_launch_data, + orte_odls_default_launch_local_procs, + orte_odls_default_kill_local_procs, + orte_odls_default_signal_local_procs +}; + +/* this entire function gets called within a GPR compound command, + * so the subscription actually doesn't get done until the orted + * executes the compound command + */ +int orte_odls_default_subscribe_launch_data(orte_jobid_t job, orte_gpr_notify_cb_fn_t cbfunc) +{ + char *segment; + orte_gpr_value_t *values[2]; + orte_gpr_subscription_t *subs, sub=ORTE_GPR_SUBSCRIPTION_EMPTY; + orte_gpr_trigger_t *trigs, trig=ORTE_GPR_TRIGGER_EMPTY; + char *glob_keys[] = { + ORTE_JOB_APP_CONTEXT_KEY, + ORTE_JOB_VPID_START_KEY, + ORTE_JOB_VPID_RANGE_KEY + }; + int num_glob_keys = 3; + char* keys[] = { + ORTE_PROC_NAME_KEY, + ORTE_PROC_APP_CONTEXT_KEY, + ORTE_NODE_NAME_KEY, + }; + int num_keys = 3; + int i, rc; + + /* get the job segment name */ + if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, job))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* attach ourselves to the "standard" orted trigger */ + if (ORTE_SUCCESS != + (rc = orte_schema.get_std_trigger_name(&(trig.name), + ORTED_LAUNCH_STAGE_GATE_TRIGGER, job))) { + ORTE_ERROR_LOG(rc); + free(segment); + return rc; + } + + /* ask for return of all data required for launching local processes */ + subs = ⊂ + sub.action = ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG; + if (ORTE_SUCCESS != (rc = orte_schema.get_std_subscription_name(&(sub.name), + ORTED_LAUNCH_STG_SUB, + job))) { + ORTE_ERROR_LOG(rc); + free(segment); + free(trig.name); + return rc; + } + sub.cnt = 2; + sub.values = values; + + if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[0]), ORTE_GPR_TOKENS_OR, segment, + num_glob_keys, 1))) { + ORTE_ERROR_LOG(rc); + free(segment); + free(sub.name); + free(trig.name); + return rc; + } + values[0]->tokens[0] = strdup(ORTE_JOB_GLOBALS); + for (i=0; i < num_glob_keys; i++) { + if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[0]->keyvals[i]), + glob_keys[i], ORTE_UNDEF, NULL))) { + ORTE_ERROR_LOG(rc); + free(segment); + free(sub.name); + free(trig.name); + OBJ_RELEASE(values[0]); + return rc; + } + } + + if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[1]), ORTE_GPR_KEYS_OR | ORTE_GPR_TOKENS_OR, + segment, num_keys, 0))) { + ORTE_ERROR_LOG(rc); + free(segment); + free(sub.name); + free(trig.name); + OBJ_RELEASE(values[0]); + return rc; + } + for (i=0; i < num_keys; i++) { + if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[1]->keyvals[i]), + keys[i], ORTE_UNDEF, NULL))) { + ORTE_ERROR_LOG(rc); + free(segment); + free(sub.name); + free(trig.name); + OBJ_RELEASE(values[0]); + OBJ_RELEASE(values[1]); + return rc; + } + } + + sub.cbfunc = cbfunc; + + trigs = &trig; + + /* do the subscription */ + if (ORTE_SUCCESS != (rc = orte_gpr.subscribe(1, &subs, 1, &trigs))) { + ORTE_ERROR_LOG(rc); + } + free(segment); + free(sub.name); + free(trig.name); + OBJ_RELEASE(values[0]); + OBJ_RELEASE(values[1]); + + return rc; +} + +static bool odls_default_child_died(pid_t pid, unsigned int timeout, int *exit_status) +{ + time_t end; + pid_t ret; + + end = time(NULL) + timeout; + do { + ret = waitpid(pid, exit_status, WNOHANG); + if (pid == ret) { + /* It died -- return success */ + return true; + } else if (-1 == ret && ECHILD == errno) { + /* The pid no longer exists, so we'll call this "good + enough for government work" */ + return true; + } + + /* Sleep for a second */ + sleep(1); + } while (time(NULL) < end); + + /* The child didn't die, so return false */ + return false; +} + +int orte_odls_default_kill_local_procs(orte_jobid_t job, bool set_state) +{ + odls_default_child_t *child; + opal_list_item_t *item; + int rc, exit_status; + opal_list_t procs_killed; + orte_namelist_t *proc; + + OBJ_CONSTRUCT(&procs_killed, opal_list_t); + + /* since we are going to be working with the global list of + * children, we need to protect that list from modification + * by other threads + */ + OPAL_THREAD_LOCK(&orte_odls_default.mutex); + + for (item = opal_list_get_first(&orte_odls_default.children); + item != opal_list_get_end(&orte_odls_default.children); + item = opal_list_get_next(item)) { + child = (odls_default_child_t*)item; + + /* is this process alive? if not, then nothing for us + * to do to it + */ + if (!child->alive) { + continue; + } + + /* do we have a child from the specified job? Because the + * job could be given as a WILDCARD value, we must use + * the dss.compare function to check for equality. + */ + if (ORTE_EQUAL != orte_dss.compare(&job, &(child->name->jobid), ORTE_JOBID)) { + continue; + } + + /* de-register the SIGCHILD callback for this pid */ + orte_wait_cb_cancel(child->pid); + + /* Send a sigterm to the process. If we get ESRCH back, that + means the process is already dead, so just move on. */ + if (0 != kill(child->pid, SIGTERM) && ESRCH != errno) { + int err = errno; + opal_show_help("help-odls-default.txt", + "odls-default:could-not-send-kill", + true, orte_system_info.nodename, child->pid, err); + goto MOVEON; + } + + /* The kill succeeded. Wait up to timeout_before_sigkill + seconds to see if it died. */ + + if (!odls_default_child_died(child->pid, orte_odls_globals.timeout_before_sigkill, &exit_status)) { + /* try killing it again */ + kill(child->pid, SIGKILL); + /* Double check that it actually died this time */ + if (!odls_default_child_died(child->pid, orte_odls_globals.timeout_before_sigkill, &exit_status)) { + opal_show_help("help-odls-default.txt", + "odls-default:could-not-kill", + true, orte_system_info.nodename, child->pid); + } + } + +MOVEON: + /* set the process to "not alive" */ + child->alive = false; + + /* add this proc to the local list */ + proc = OBJ_NEW(orte_namelist_t); + if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(proc->name), child->name, ORTE_NAME))) { + ORTE_ERROR_LOG(rc); + opal_condition_signal(&orte_odls_default.cond); + OPAL_THREAD_UNLOCK(&orte_odls_default.mutex); + return rc; + } + opal_list_append(&procs_killed, &proc->item); + } + + /* we are done with the global list, so we can now release + * any waiting threads - this also allows any callbacks to work + */ + opal_condition_signal(&orte_odls_default.cond); + OPAL_THREAD_UNLOCK(&orte_odls_default.mutex); + + /* deconstruct the local list and update the process states on the registry, if indicated */ + while (NULL != (item = opal_list_remove_first(&procs_killed))) { + proc = (orte_namelist_t*)item; + if (set_state) { + if (ORTE_SUCCESS != (rc = orte_smr.set_proc_state(proc->name, ORTE_PROC_STATE_TERMINATED, exit_status))) { + ORTE_ERROR_LOG(rc); + /* don't exit out even if this didn't work - we still might need to kill more + * processes, so just keep trucking + */ + } + } + OBJ_RELEASE(proc); + } + + OBJ_DESTRUCT(&procs_killed); + + return ORTE_SUCCESS; +} + +/* + * Wait for a callback indicating the child has completed. + */ + +static void odls_default_wait_local_proc(pid_t pid, int status, void* cbdata) +{ + odls_default_child_t *child; + opal_list_item_t *item; + bool aborted; + char *job, *vpid, *abort_file; + struct stat buf; + int rc; + + /* since we are going to be working with the global list of + * children, we need to protect that list from modification + * by other threads. This will also be used to protect us + * from race conditions on any abort situation + */ + OPAL_THREAD_LOCK(&orte_odls_default.mutex); + + /* find this child */ + for (item = opal_list_get_first(&orte_odls_default.children); + item != opal_list_get_end(&orte_odls_default.children); + item = opal_list_get_next(item)) { + child = (odls_default_child_t*)item; + if (child->alive && pid == child->pid) { /* found it */ + goto GOTCHILD; + } + } + /* get here if we didn't find the child, or if the specified child is already + * dead. If the latter, then we have a problem as it means we are detecting + * it exiting multiple times + */ + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + opal_condition_signal(&orte_odls_default.cond); + OPAL_THREAD_UNLOCK(&orte_odls_default.mutex); + return; + +GOTCHILD: + orte_iof.iof_flush(); + + /* determine the state of this process */ + aborted = false; + if(WIFEXITED(status)) { + /* even though the process exited "normally", it is quite + * possible that this happened via an orte_abort call - in + * which case, we need to indicate this was an "abnormal" + * termination. See the note in "orte_abort.c" for + * an explanation of this process. + * + * For our purposes here, we need to check for the existence + * of an "abort" file in this process' session directory. If + * we find it, then we know that this was an abnormal termination. + */ + if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&job, child->name->jobid))) { + ORTE_ERROR_LOG(rc); + goto MOVEON; + } + if (ORTE_SUCCESS != (rc = orte_ns.convert_vpid_to_string(&vpid, child->name->vpid))) { + ORTE_ERROR_LOG(rc); + free(job); + goto MOVEON; + } + abort_file = opal_os_path(false, orte_process_info.universe_session_dir, + job, vpid, "abort", NULL ); + free(job); + free(vpid); + if (0 == stat(abort_file, &buf)) { + /* the abort file must exist - there is nothing in it we need. It's + * meer existence indicates that an abnormal termination occurred + */ + aborted = true; + free(abort_file); + } + } else { + /* the process was terminated with a signal! That's definitely + * abnormal, so indicate that condition + */ + aborted = true; + } + +MOVEON: + /* set this proc to "not alive" */ + child->alive = false; + + /* Clean up the session directory as if we were the process + * itself. This covers the case where the process died abnormally + * and didn't cleanup its own session directory. + */ + orte_session_dir_finalize(child->name); + + /* Need to unlock before we call set_proc_state as this is going to generate + * a trigger that will eventually callback to us + */ + opal_condition_signal(&orte_odls_default.cond); + OPAL_THREAD_UNLOCK(&orte_odls_default.mutex); + + if (aborted) { + rc = orte_smr.set_proc_state(child->name, ORTE_PROC_STATE_ABORTED, status); + } else { + rc = orte_smr.set_proc_state(child->name, ORTE_PROC_STATE_TERMINATED, status); + } + + if (ORTE_SUCCESS != rc) { + ORTE_ERROR_LOG(rc); + } +} + +/** + * Fork/exec the specified processes + */ + +static int odls_default_fork_local_proc( + orte_app_context_t* context, + odls_default_child_t *child, + orte_vpid_t vpid_start, + orte_vpid_t vpid_range, + bool want_processor, + size_t processor) +{ + pid_t pid; + orte_iof_base_io_conf_t opts; + int rc; + sigset_t sigs; + int i = 0, p[2]; + + /* should pull this information from MPIRUN instead of going with + default */ + opts.usepty = OMPI_ENABLE_PTY_SUPPORT; + + /* BWB - Fix post beta. Should setup stdin in orterun and + make part of the app_context */ + if (child->name->vpid == 0) { + opts.connect_stdin = true; + } else { + opts.connect_stdin = false; + } + + rc = orte_iof_base_setup_prefork(&opts); + if (ORTE_SUCCESS != rc) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + /* A pipe is used to communicate between the parent and child to + indicate whether the exec ultiimately succeeded or failed. The + child sets the pipe to be close-on-exec; the child only ever + writes anything to the pipe if there is an error (e.g., + executable not found, exec() fails, etc.). The parent does a + blocking read on the pipe; if the pipe closed with no data, + then the exec() succeeded. If the parent reads something from + the pipe, then the child was letting us know that it failed. */ + if (pipe(p) < 0) { + ORTE_ERROR_LOG(ORTE_ERR_IN_ERRNO); + return ORTE_ERR_IN_ERRNO; + } + + /* Fork off the child */ + pid = fork(); + if(pid < 0) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + if (pid == 0) { + char *param, *param2; + char *uri; + char **environ_copy; + long fd, fdmax = sysconf(_SC_OPEN_MAX); + + /* Setup the pipe to be close-on-exec */ + close(p[0]); + fcntl(p[1], F_SETFD, FD_CLOEXEC); + + /* setup stdout/stderr so that any error messages that we may + print out will get displayed back at orterun */ + orte_iof_base_setup_child(&opts); + + /* Try to change to the context cwd and check that the app + exists and is executable */ + if (ORTE_SUCCESS != (i = orte_rmgr.check_context_cwd(context, true))) { + opal_show_help("help-odls-default.txt", + "odls-default:chdir-error", + true, orte_system_info.nodename, context->cwd); + /* Tell the parent that Badness happened */ + write(p[1], &i, sizeof(int)); + exit(-1); + } + if (ORTE_SUCCESS != (i = orte_rmgr.check_context_app(context))) { + opal_show_help("help-odls-default.txt", + "odls-default:argv0-not-accessible", + true, orte_system_info.nodename, context->app); + /* Tell the parent that Badness happened */ + write(p[1], &i, sizeof(int)); + exit(-1); + } + + /* setup base environment: copy the current environ and merge + in the app context environ */ + if (NULL != context->env) { + environ_copy = opal_environ_merge(environ, context->env); + } else { + environ_copy = opal_argv_copy(environ); + } + + /* special case handling for --prefix: this is somewhat icky, + but at least some users do this. :-\ It is possible that + when using --prefix, the user will also "-x PATH" and/or + "-x LD_LIBRARY_PATH", which would therefore clobber the + work that was done in the prior pls to ensure that we have + the prefix at the beginning of the PATH and + LD_LIBRARY_PATH. So examine the context->env and see if we + find PATH or LD_LIBRARY_PATH. If found, that means the + prior work was clobbered, and we need to re-prefix those + variables. */ + for (i = 0; NULL != context->env && NULL != context->env[i]; ++i) { + char *newenv; + + /* Reset PATH */ + if (0 == strncmp("PATH=", context->env[i], 5)) { + asprintf(&newenv, "%s/bin:%s", + context->prefix_dir, context->env[i] + 5); + opal_setenv("PATH", newenv, true, &environ_copy); + free(newenv); + } + + /* Reset LD_LIBRARY_PATH */ + else if (0 == strncmp("LD_LIBRARY_PATH=", context->env[i], 16)) { + asprintf(&newenv, "%s/lib:%s", + context->prefix_dir, context->env[i] + 16); + opal_setenv("LD_LIBRARY_PATH", newenv, true, &environ_copy); + free(newenv); + } + } + + param = mca_base_param_environ_variable("rmgr","bootproxy","jobid"); + opal_unsetenv(param, &environ_copy); + free(param); + + /* Handle processor affinity */ + if (want_processor) { + param = mca_base_param_environ_variable("mpi", NULL, + "paffinity_processor"); + asprintf(¶m2, "%lu", (unsigned long) processor); + opal_setenv(param, param2, true, &environ_copy); + free(param); + free(param2); + } + + /* setup universe info */ + if (NULL != orte_universe_info.name) { + param = mca_base_param_environ_variable("universe", NULL, NULL); + asprintf(&uri, "%s@%s:%s", orte_universe_info.uid, + orte_universe_info.host, + orte_universe_info.name); + opal_setenv(param, uri, true, &environ_copy); + free(param); + free(uri); + } + + /* setup ns contact info */ + if(NULL != orte_process_info.ns_replica_uri) { + uri = strdup(orte_process_info.ns_replica_uri); + } else { + uri = orte_rml.get_uri(); + } + param = mca_base_param_environ_variable("ns","replica","uri"); + opal_setenv(param, uri, true, &environ_copy); + free(param); + free(uri); + + /* setup gpr contact info */ + if(NULL != orte_process_info.gpr_replica_uri) { + uri = strdup(orte_process_info.gpr_replica_uri); + } else { + uri = orte_rml.get_uri(); + } + param = mca_base_param_environ_variable("gpr","replica","uri"); + opal_setenv(param, uri, true, &environ_copy); + free(param); + free(uri); + + /* use same nodename as the starting daemon (us) */ + param = mca_base_param_environ_variable("orte", "base", "nodename"); + opal_setenv(param, orte_system_info.nodename, true, &environ_copy); + free(param); + + /* push name into environment */ + orte_ns_nds_env_put(child->name, vpid_start, vpid_range, + &environ_copy); + + /* close all file descriptors w/ exception of stdin/stdout/stderr */ + for(fd=3; fdargv == NULL) { + context->argv = malloc(sizeof(char*)*2); + context->argv[0] = strdup(context->app); + context->argv[1] = NULL; + } + + /* Set signal handlers back to the default. Do this close to + the exev() because the event library may (and likely will) + reset them. If we don't do this, the event library may + have left some set that, at least on some OS's, don't get + reset via fork() or exec(). Hence, the launched process + could be unkillable (for example). */ + + set_handler_default(SIGTERM); + set_handler_default(SIGINT); + set_handler_default(SIGHUP); + set_handler_default(SIGPIPE); + set_handler_default(SIGCHLD); + + /* Unblock all signals, for many of the same reasons that we + set the default handlers, above. This is noticable on + Linux where the event library blocks SIGTERM, but we don't + want that blocked by the launched process. */ + sigprocmask(0, 0, &sigs); + sigprocmask(SIG_UNBLOCK, &sigs, 0); + + /* Exec the new executable */ + + execve(context->app, context->argv, environ_copy); + opal_show_help("help-orted-launcer.txt", "orted-launcher:execv-error", + true, context->app, strerror(errno)); + exit(-1); + } else { + + /* connect endpoints IOF */ + rc = orte_iof_base_setup_parent(child->name, &opts); + if(ORTE_SUCCESS != rc) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* Wait to read something from the pipe or close */ + close(p[1]); + while (1) { + rc = read(p[0], &i, sizeof(int)); + if (rc < 0) { + /* Signal interrupts are ok */ + if (errno == EINTR) { + continue; + } + /* Other errno's are bad */ + return ORTE_ERR_IN_ERRNO; + break; + } else if (0 == rc) { + /* Child was successful in exec'ing! */ + break; + } else { + /* Doh -- child failed. + Report the failure to launch this process through + the SOH or else everyone else will hang. Don't bother + checking whether or not this worked - just fire and forget + */ + ORTE_ERROR_LOG(i); + orte_smr.set_proc_state(child->name, ORTE_PROC_STATE_ABORTED, rc); + return ORTE_ERR_FATAL; + break; + } + } + + /* set the proc state to LAUNCHED and increment that counter so the trigger can fire + */ + if (ORTE_SUCCESS != + (rc = orte_smr.set_proc_state(child->name, ORTE_PROC_STATE_LAUNCHED, 0))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* save the pid and indicate we've been launched */ + child->pid = pid; + child->alive = true; + + /* wait for the child process - dont register for wait + * callback until after I/O is setup and the pid registered - + * otherwise can receive the wait callback before the above is + * ever completed + */ + orte_wait_cb(pid, odls_default_wait_local_proc, NULL); + } + return ORTE_SUCCESS; +} + + +/** + * Launch all processes allocated to the current node. + */ + +int orte_odls_default_launch_local_procs(orte_gpr_notify_data_t *data) +{ + int rc; + orte_std_cntr_t i, j, kv, kv2, *sptr; + orte_gpr_value_t *value, **values; + orte_gpr_keyval_t *kval; + orte_app_context_t *app; + orte_jobid_t job; + orte_vpid_t *vptr, start, range; + char *node_name; + opal_list_t app_context_list; + odls_default_child_t *child; + odls_default_app_context_t *app_item; + size_t num_processors; + bool want_processor; + opal_list_item_t *item, *item2; + + /* parse the returned data to create the required structures + * for a fork launch. Since the data will contain information + * on procs for ALL nodes, we first have to find the value + * struct that contains info for our node. + */ + + /* first, retrieve the job number we are to launch from the + * returned data - we can extract the jobid directly from the + * subscription name we created + */ + if (ORTE_SUCCESS != (rc = orte_schema.extract_jobid_from_std_trigger_name(&job, data->target))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* We need to create a list of the app_contexts + * so we can know what to launch - the process info only gives + * us an index into the app_context array, not the app_context + * info itself. + */ + + OBJ_CONSTRUCT(&app_context_list, opal_list_t); + + values = (orte_gpr_value_t**)(data->values)->addr; + for (j=0, i=0; i < data->cnt && j < (data->values)->size; j++) { /* loop through all returned values */ + if (NULL != values[j]) { + i++; + value = values[j]; + + if (0 == strcmp(value->tokens[0], ORTE_JOB_GLOBALS)) { + /* this came from the globals container, so it must contain + * the app_context(s), vpid_start, and vpid_range entries. Only one + * value object should ever come from that container + */ + for (kv=0; kv < value->cnt; kv++) { + kval = value->keyvals[kv]; + if (strcmp(kval->key, ORTE_JOB_VPID_START_KEY) == 0) { + /* this can only occur once, so just store it */ + if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&vptr, kval->value, ORTE_VPID))) { + ORTE_ERROR_LOG(rc); + return rc; + } + start = *vptr; + continue; + } + if (strcmp(kval->key, ORTE_JOB_VPID_RANGE_KEY) == 0) { + /* this can only occur once, so just store it */ + if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&vptr, kval->value, ORTE_VPID))) { + ORTE_ERROR_LOG(rc); + return rc; + } + range = *vptr; + continue; + } + if (strcmp(kval->key, ORTE_JOB_APP_CONTEXT_KEY) == 0) { + /* this can occur multiple times since we allow multiple + * app_contexts on the orterun command line. Add them + * to the list + */ + if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&app, kval->value, ORTE_APP_CONTEXT))) { + ORTE_ERROR_LOG(rc); + return rc; + } + app_item = OBJ_NEW(odls_default_app_context_t); + if (NULL == app_item) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + app_item->app_context = app; + opal_list_append(&app_context_list, &app_item->super); + kval->value->data = NULL; /* protect the data storage from later release */ + } + } /* end for loop to process global data */ + } else { + /* this must have come from one of the process containers, so it must + * contain data for a proc structure - see if it + * belongs to this node + */ + for (kv=0; kv < value->cnt; kv++) { + kval = value->keyvals[kv]; + if (strcmp(kval->key, ORTE_NODE_NAME_KEY) == 0) { + /* Most C-compilers will bark if we try to directly compare the string in the + * kval data area against a regular string, so we need to "get" the data + * so we can access it */ + if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&node_name, kval->value, ORTE_STRING))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* if this is our node...must also protect against a zero-length string */ + if (NULL != node_name && 0 == strcmp(node_name, orte_system_info.nodename)) { + /* ...harvest the info into a new child structure */ + child = OBJ_NEW(odls_default_child_t); + for (kv2 = 0; kv2 < value->cnt; kv2++) { + kval = value->keyvals[kv2]; + if(strcmp(kval->key, ORTE_PROC_NAME_KEY) == 0) { + /* copy the name into the child object */ + if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(child->name), kval->value->data, ORTE_NAME))) { + ORTE_ERROR_LOG(rc); + return rc; + } + continue; + } + if(strcmp(kval->key, ORTE_PROC_APP_CONTEXT_KEY) == 0) { + if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&sptr, kval->value, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + return rc; + } + child->app_idx = *sptr; /* save the index into the app_context objects */ + continue; + } + } /* kv2 */ + /* protect operation on the global list of children */ + OPAL_THREAD_LOCK(&orte_odls_default.mutex); + opal_list_append(&orte_odls_default.children, &child->super); + opal_condition_signal(&orte_odls_default.cond); + OPAL_THREAD_UNLOCK(&orte_odls_default.mutex); + + } + } + } /* for kv */ + } + } /* for j */ + } + + /* determine if we are oversubscribed */ + want_processor = true; /* default to taking it for ourselves */ + opal_paffinity_base_get_num_processors(&rc); + num_processors = (size_t)rc; + if (opal_list_get_size(&orte_odls_default.children) > num_processors) { /* oversubscribed */ + want_processor = false; + } + + /* okay, now let's launch our local procs using a fork/exec */ + i = 0; + /* protect operations involving the global list of children */ + OPAL_THREAD_LOCK(&orte_odls_default.mutex); + + for (item = opal_list_get_first(&orte_odls_default.children); + item != opal_list_get_end(&orte_odls_default.children); + item = opal_list_get_next(item)) { + child = (odls_default_child_t*)item; + + /* is this child already alive? This can happen if + * we are asked to launch additional processes. + * If it has been launched, then do nothing + */ + if (child->alive) { + continue; + } + + /* do we have a child from the specified job. Because the + * job could be given as a WILDCARD value, we must use + * the dss.compare function to check for equality. + */ + if (ORTE_EQUAL != orte_dss.compare(&job, &(child->name->jobid), ORTE_JOBID)) { + continue; + } + + /* find the indicated app_context in the list */ + for (item2 = opal_list_get_first(&app_context_list); + item2 != opal_list_get_end(&app_context_list); + item2 = opal_list_get_next(item2)) { + app_item = (odls_default_app_context_t*)item2; + if (child->app_idx == app_item->app_context->idx) { + app = app_item->app_context; + goto DOFORK; + } + } + /* get here if we couldn't find the app_context */ + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + opal_condition_signal(&orte_odls_default.cond); + OPAL_THREAD_UNLOCK(&orte_odls_default.mutex); + return ORTE_ERR_NOT_FOUND; + +DOFORK: + /* must unlock prior to fork to keep things clean in the + * event library + */ + opal_condition_signal(&orte_odls_default.cond); + OPAL_THREAD_UNLOCK(&orte_odls_default.mutex); + + if (ORTE_SUCCESS != (rc = odls_default_fork_local_proc(app, child, start, + range, want_processor, i))) { + ORTE_ERROR_LOG(rc); + orte_smr.set_proc_state(child->name, ORTE_PROC_STATE_ABORTED, 0); + opal_condition_signal(&orte_odls_default.cond); + OPAL_THREAD_UNLOCK(&orte_odls_default.mutex); + return rc; + } + i++; + } + + /* cleanup */ + while (NULL != (item = opal_list_remove_first(&app_context_list))) { + OBJ_RELEASE(item); + } + OBJ_DESTRUCT(&app_context_list); + + opal_condition_signal(&orte_odls_default.cond); + OPAL_THREAD_UNLOCK(&orte_odls_default.mutex); + return ORTE_SUCCESS; +} + + +/** + * Pass a signal to my local procs + */ + +static int send_signal(pid_t pid, int signal) +{ + int rc = ORTE_SUCCESS; + + if (kill(pid, signal) != 0) { + switch(errno) { + case EINVAL: + ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); + rc = ORTE_ERR_BAD_PARAM; + break; + case ESRCH: + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + rc = ORTE_ERR_NOT_FOUND; + break; + case EPERM: + ORTE_ERROR_LOG(ORTE_ERR_PERM); + rc = ORTE_ERR_PERM; + break; + default: + ORTE_ERROR_LOG(ORTE_ERROR); + rc = ORTE_ERROR; + } + } + + return rc; +} +int orte_odls_default_signal_local_procs(orte_process_name_t *proc, int32_t signal) +{ + int rc; + opal_list_item_t *item; + odls_default_child_t *child; + + /* protect operations involving the global list of children */ + OPAL_THREAD_LOCK(&orte_odls_default.mutex); + + /* if procs is NULL, then we want to signal all + * of the local procs, so just do that case + */ + if (NULL == proc) { + for (item = opal_list_get_first(&orte_odls_default.children); + item != opal_list_get_end(&orte_odls_default.children); + item = opal_list_get_next(item)) { + child = (odls_default_child_t*)item; + if (ORTE_SUCCESS != (rc = send_signal(child->pid, (int)signal))) { + ORTE_ERROR_LOG(rc); + } + } + opal_condition_signal(&orte_odls_default.cond); + OPAL_THREAD_UNLOCK(&orte_odls_default.mutex); + return rc; + } + + /* we want it sent to some specified process, so find it */ + for (item = opal_list_get_first(&orte_odls_default.children); + item != opal_list_get_end(&orte_odls_default.children); + item = opal_list_get_next(item)) { + child = (odls_default_child_t*)item; + if (ORTE_EQUAL == orte_dss.compare(&(child->name), proc, ORTE_NAME)) { + /* unlock before signaling as this may generate a callback */ + opal_condition_signal(&orte_odls_default.cond); + OPAL_THREAD_UNLOCK(&orte_odls_default.mutex); + if (ORTE_SUCCESS != (rc = send_signal(child->pid, (int)signal))) { + ORTE_ERROR_LOG(rc); + } + return rc; + } + } + + /* only way to get here is if we couldn't find the specified proc. + * report that as an error and return it + */ + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + opal_condition_signal(&orte_odls_default.cond); + OPAL_THREAD_UNLOCK(&orte_odls_default.mutex); + return ORTE_ERR_NOT_FOUND; +} + + +static void set_handler_default(int sig) +{ + struct sigaction act; + + act.sa_handler = SIG_DFL; + act.sa_flags = 0; + sigemptyset(&act.sa_mask); + + sigaction(sig, &act, (struct sigaction *)0); +} diff --git a/orte/mca/odls/odls.h b/orte/mca/odls/odls.h new file mode 100644 index 0000000000..7d6ca8b219 --- /dev/null +++ b/orte/mca/odls/odls.h @@ -0,0 +1,130 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + * + * The OpenRTE Daemon's Local Launch Subsystem + * + */ + +#ifndef ORTE_MCA_ODLS_H +#define ORTE_MCA_ODLS_H + +#include "orte_config.h" + +#include "opal/mca/mca.h" +#include "opal/class/opal_list.h" + +#include "orte/mca/gpr/gpr_types.h" +#include "orte/mca/ns/ns_types.h" + +#include "orte/mca/odls/odls_types.h" + +/* + * odls module functions + */ + +/** + * Subscribe to receive the launch data for local processes + */ +typedef int (*orte_odls_base_module_subscribe_launch_data_fn_t)(orte_jobid_t job, orte_gpr_notify_cb_fn_t cbfunc); + +/** + * Locally launch the provided processes + */ +typedef int (*orte_odls_base_module_launch_local_processes_fn_t)(orte_gpr_notify_data_t *data); + +/** + * Kill the local processes on this node + */ +typedef int (*orte_odls_base_module_kill_local_processes_fn_t)(orte_jobid_t job, bool set_state); + +/** + * Signal local processes + */ +typedef int (*orte_pls_base_module_signal_local_process_fn_t)(orte_process_name_t *proc, + int32_t signal); + +/** + * pls module version 1.3.0 + */ +struct orte_odls_base_module_1_3_0_t { + orte_odls_base_module_subscribe_launch_data_fn_t subscribe_launch_data; + orte_odls_base_module_launch_local_processes_fn_t launch_local_procs; + orte_odls_base_module_kill_local_processes_fn_t kill_local_procs; + orte_pls_base_module_signal_local_process_fn_t signal_local_procs; +}; + +/** shorten orte_odls_base_module_1_3_0_t declaration */ +typedef struct orte_odls_base_module_1_3_0_t orte_odls_base_module_1_3_0_t; +/** shorten orte_odls_base_module_t declaration */ +typedef struct orte_odls_base_module_1_3_0_t orte_odls_base_module_t; + +/** + * odls initialization function + * + * Called by the MCA framework to initialize the component. Invoked + * exactly once per process. + * + * @param priority (OUT) Relative priority or ranking use by MCA to + * select a module. + */ +typedef struct orte_odls_base_module_1_3_0_t* +(*orte_odls_base_component_init_fn_t)(int *priority); + +/** + * Cleanup all resources held by the component + */ +typedef int (*orte_odls_base_component_finalize_fn_t)(void); + + +/** + * odls component v1.3.0 + */ +struct orte_odls_base_component_1_3_0_t { + /** component version */ + mca_base_component_t version; + /** component data */ + mca_base_component_data_1_0_0_t odls_data; + /** Function called when component is initialized */ + orte_odls_base_component_init_fn_t init; + /* Function called when component is finalized */ + orte_odls_base_component_finalize_fn_t finalize; +}; +/** Convenience typedef */ +typedef struct orte_odls_base_component_1_3_0_t orte_odls_base_component_1_3_0_t; +/** Convenience typedef */ +typedef orte_odls_base_component_1_3_0_t orte_odls_base_component_t; + + +/** + * Macro for use in modules that are of type odls v1.3.0 + */ +#define ORTE_ODLS_BASE_VERSION_1_3_0 \ + /* odls v1.3 is chained to MCA v1.0 */ \ + MCA_BASE_VERSION_1_0_0, \ + /* odls v1.3 */ \ + "odls", 1, 3, 0 + +/* Global structure for accessing ODLS functions +*/ +ORTE_DECLSPEC extern orte_odls_base_module_t orte_odls; /* holds selected module's function pointers */ + + +#endif /* MCA_ODLS_H */ diff --git a/orte/mca/odls/odls_types.h b/orte/mca/odls/odls_types.h new file mode 100644 index 0000000000..9a158cb7fa --- /dev/null +++ b/orte/mca/odls/odls_types.h @@ -0,0 +1,51 @@ +/* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + */ + +#ifndef ORTE_MCA_ODLS_TYPES_H +#define ORTE_MCA_ODLS_TYPES_H + +#include "orte_config.h" +#include "orte/orte_types.h" + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +/* define the orted command flag type */ +typedef uint8_t orte_daemon_cmd_flag_t; +#define ORTE_DAEMON_CMD_T ORTE_UINT8 + + +/* + * Definitions needed for communication + */ +#define ORTE_DAEMON_HOSTFILE_CMD (orte_daemon_cmd_flag_t) 1 +#define ORTE_DAEMON_SCRIPTFILE_CMD (orte_daemon_cmd_flag_t) 2 +#define ORTE_DAEMON_CONTACT_QUERY_CMD (orte_daemon_cmd_flag_t) 3 +#define ORTE_DAEMON_KILL_LOCAL_PROCS (orte_daemon_cmd_flag_t) 4 +#define ORTE_DAEMON_SIGNAL_LOCAL_PROCS (orte_daemon_cmd_flag_t) 5 +#define ORTE_DAEMON_ADD_LOCAL_PROCS (orte_daemon_cmd_flag_t) 6 +#define ORTE_DAEMON_HEARTBEAT_CMD (orte_daemon_cmd_flag_t) 254 +#define ORTE_DAEMON_EXIT_CMD (orte_daemon_cmd_flag_t) 255 + + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif +#endif diff --git a/orte/mca/odls/windows/.ompi_ignore b/orte/mca/odls/windows/.ompi_ignore new file mode 100644 index 0000000000..e69de29bb2 diff --git a/orte/mca/pls/process/Makefile.am b/orte/mca/odls/windows/Makefile.am old mode 100644 new mode 100755 similarity index 100% rename from orte/mca/pls/process/Makefile.am rename to orte/mca/odls/windows/Makefile.am diff --git a/orte/mca/pls/process/configure.m4 b/orte/mca/odls/windows/configure.m4 old mode 100644 new mode 100755 similarity index 78% rename from orte/mca/pls/process/configure.m4 rename to orte/mca/odls/windows/configure.m4 index 46d41a63cb..8711262c1d --- a/orte/mca/pls/process/configure.m4 +++ b/orte/mca/odls/windows/configure.m4 @@ -10,8 +10,8 @@ # $HEADER$ # -# MCA_pls_process_CONFIG([action-if-found], [action-if-not-found]) +# MCA_odls_windows_CONFIG([action-if-found], [action-if-not-found]) # ----------------------------------------------------------- -AC_DEFUN([MCA_pls_process_CONFIG],[ +AC_DEFUN([MCA_odls_windows_CONFIG],[ AC_CHECK_FUNC([CreateProcess], [$1], [$2]) ])dnl diff --git a/orte/mca/pls/process/configure.params b/orte/mca/odls/windows/configure.params old mode 100644 new mode 100755 similarity index 88% rename from orte/mca/pls/process/configure.params rename to orte/mca/odls/windows/configure.params index 435e28ad55..8c44ff140d --- a/orte/mca/pls/process/configure.params +++ b/orte/mca/odls/windows/configure.params @@ -10,5 +10,5 @@ # $HEADER$ # -PARAM_INIT_FILE=pls_process_component.c +PARAM_INIT_FILE=odls_windows_component.c PARAM_CONFIG_FILES="Makefile" diff --git a/orte/mca/pls/process/help-orte-pls-process.txt b/orte/mca/odls/windows/help-odls-windows.txt old mode 100644 new mode 100755 similarity index 100% rename from orte/mca/pls/process/help-orte-pls-process.txt rename to orte/mca/odls/windows/help-odls-windows.txt diff --git a/orte/mca/pls/process/pls_process.h b/orte/mca/odls/windows/odls_windows.h old mode 100644 new mode 100755 similarity index 100% rename from orte/mca/pls/process/pls_process.h rename to orte/mca/odls/windows/odls_windows.h diff --git a/orte/mca/pls/process/pls_process_component.c b/orte/mca/odls/windows/odls_windows_component.c old mode 100644 new mode 100755 similarity index 100% rename from orte/mca/pls/process/pls_process_component.c rename to orte/mca/odls/windows/odls_windows_component.c diff --git a/orte/mca/pls/process/pls_process_module.c b/orte/mca/odls/windows/odls_windows_module.c old mode 100644 new mode 100755 similarity index 100% rename from orte/mca/pls/process/pls_process_module.c rename to orte/mca/odls/windows/odls_windows_module.c diff --git a/orte/mca/oob/base/oob_base_xcast.c b/orte/mca/oob/base/oob_base_xcast.c index 4433191cfc..e3d5836226 100644 --- a/orte/mca/oob/base/oob_base_xcast.c +++ b/orte/mca/oob/base/oob_base_xcast.c @@ -91,7 +91,7 @@ int mca_oob_xcast( { orte_std_cntr_t i; int rc; - int tag = MCA_OOB_TAG_XCAST; + int tag = ORTE_RML_TAG_XCAST; int status; orte_proc_state_t state; diff --git a/orte/mca/oob/oob_types.h b/orte/mca/oob/oob_types.h index df46e4a870..f0e6d85e76 100644 --- a/orte/mca/oob/oob_types.h +++ b/orte/mca/oob/oob_types.h @@ -32,23 +32,6 @@ * Other constants */ -/** - * Service tags - */ -#define MCA_OOB_TAG_NS (orte_rml_tag_t) 1 -#define MCA_OOB_TAG_GPR (orte_rml_tag_t) 2 -#define MCA_OOB_TAG_GPR_NOTIFY (orte_rml_tag_t) 3 -#define MCA_OOB_TAG_RTE (orte_rml_tag_t) 4 -#define MCA_OOB_TAG_EXEC (orte_rml_tag_t) 5 -#define MCA_OOB_TAG_DAEMON (orte_rml_tag_t) 6 -#define MCA_OOB_TAG_STDIO (orte_rml_tag_t) 7 -#define MCA_OOB_TAG_SCHED (orte_rml_tag_t) 8 -#define MCA_OOB_TAG_PCM_KILL (orte_rml_tag_t) 9 -#define MCA_OOB_TAG_XCAST (orte_rml_tag_t) 10 -#define MCA_OOB_TAG_PCM_KILL_ACK (orte_rml_tag_t) 11 -#define MCA_OOB_TAG_BPROC (orte_rml_tag_t) 12 -#define ORTE_OOB_TAG_START_LIST (orte_rml_tag_t) 100 /* starting point for tag server assignments */ - /** * The wildcard for receives from any peer. */ diff --git a/orte/mca/oob/tcp/oob_tcp.c b/orte/mca/oob/tcp/oob_tcp.c index ea14b13a68..b2e6f5ccce 100644 --- a/orte/mca/oob/tcp/oob_tcp.c +++ b/orte/mca/oob/tcp/oob_tcp.c @@ -83,6 +83,7 @@ OBJ_CLASS_INSTANCE( */ static int mca_oob_tcp_create_listen(void); +static int mca_oob_tcp_create_listen_thread(void); static void mca_oob_tcp_recv_handler(int sd, short flags, void* user); static void mca_oob_tcp_accept(void); @@ -100,6 +101,12 @@ OBJ_CLASS_INSTANCE( NULL, NULL); +OBJ_CLASS_INSTANCE( + mca_oob_tcp_pending_connection_t, + opal_free_list_item_t, + NULL, + NULL); + /* @@ -169,6 +176,9 @@ static inline char* mca_oob_tcp_param_register_str( */ int mca_oob_tcp_component_open(void) { + char *listen_type; + int tmp; + #ifdef __WINDOWS__ WSADATA win_sock_data; if (WSAStartup(MAKEWORD(2,2), &win_sock_data) != 0) { @@ -190,6 +200,12 @@ int mca_oob_tcp_component_open(void) OBJ_CONSTRUCT(&mca_oob_tcp_component.tcp_msg_completed, opal_list_t); OBJ_CONSTRUCT(&mca_oob_tcp_component.tcp_match_lock, opal_mutex_t); OBJ_CONSTRUCT(&mca_oob_tcp_component.tcp_match_cond, opal_condition_t); + OBJ_CONSTRUCT(&mca_oob_tcp_component.tcp_listen_thread, opal_thread_t); + OBJ_CONSTRUCT(&mca_oob_tcp_component.tcp_pending_connections_fl, opal_free_list_t); + OBJ_CONSTRUCT(&mca_oob_tcp_component.tcp_pending_connections, opal_list_t); + OBJ_CONSTRUCT(&mca_oob_tcp_component.tcp_copy_out_connections, opal_list_t); + OBJ_CONSTRUCT(&mca_oob_tcp_component.tcp_copy_in_connections, opal_list_t); + OBJ_CONSTRUCT(&mca_oob_tcp_component.tcp_pending_connections_lock, opal_mutex_t); /* register oob module parameters */ mca_oob_tcp_component.tcp_peer_limit = @@ -207,9 +223,60 @@ int mca_oob_tcp_component_open(void) mca_oob_tcp_component.tcp_rcvbuf = mca_oob_tcp_param_register_int("rcvbuf", 128*1024); + mca_base_param_reg_string(&mca_oob_tcp_component.super.oob_base, + "listen_mode", + "Mode for HNP to accept incoming connections: event, listen_thread", + false, + false, + "event", + &listen_type); + + if ((0 == strcmp(listen_type, "event")) || NULL == getenv("I_AM_MPIRUN")) { + mca_oob_tcp_component.tcp_listen_type = OOB_TCP_EVENT; + } else if (0 == strcmp(listen_type, "listen_thread")) { + mca_oob_tcp_component.tcp_listen_type = OOB_TCP_LISTEN_THREAD; + } else { + opal_output(0, "Invalid value for oob_tcp_listen_mode parameter: %s", + listen_type); + return ORTE_ERROR; + } + + mca_base_param_reg_int(&mca_oob_tcp_component.super.oob_base, + "listen_thread_max_queue", + "High water mark for queued accepted socket list size", + false, + false, + 10, + &mca_oob_tcp_component.tcp_copy_max_size); + + mca_base_param_reg_int(&mca_oob_tcp_component.super.oob_base, + "listen_thread_max_time", + "Maximum amount of time (in milliseconds) to wait between processing accepted socket list", + false, + false, + 10, + &tmp); + +#if OPAL_TIMER_USEC_NATIVE + mca_oob_tcp_component.tcp_copy_delta = tmp * 1000; +#else + mca_oob_tcp_component.tcp_copy_delta = tmp * + opal_timer_base_get_freq() / 1000; +#endif + + mca_base_param_reg_int(&mca_oob_tcp_component.super.oob_base, + "accept_spin_count", + "Number of times to let accept return EWOULDBLOCK before updating accepted socket list", + false, + false, + 10, + &mca_oob_tcp_component.tcp_copy_spin_count); + /* initialize state */ + mca_oob_tcp_component.tcp_shutdown = false; mca_oob_tcp_component.tcp_listen_sd = -1; mca_oob_tcp_component.tcp_match_count = 0; + mca_oob_tcp_component.tcp_last_copy_time = 0; return ORTE_SUCCESS; } @@ -251,7 +318,7 @@ int mca_oob_tcp_component_close(void) static void mca_oob_tcp_accept(void) { while(true) { - ompi_socklen_t addrlen = sizeof(struct sockaddr_in); + opal_socklen_t addrlen = sizeof(struct sockaddr_in); struct sockaddr_in addr; mca_oob_tcp_event_t* event; int sd; @@ -291,7 +358,7 @@ static int mca_oob_tcp_create_listen(void) { int flags; struct sockaddr_in inaddr; - ompi_socklen_t addrlen; + opal_socklen_t addrlen; /* create a listen socket for incoming connections */ mca_oob_tcp_component.tcp_listen_sd = socket(AF_INET, SOCK_STREAM, 0); @@ -352,6 +419,206 @@ static int mca_oob_tcp_create_listen(void) } +static void* mca_oob_tcp_listen_thread(opal_object_t *obj) +{ + int rc, count; + opal_socklen_t addrlen = sizeof(struct sockaddr_in); + opal_free_list_item_t *fl_item; + mca_oob_tcp_pending_connection_t *item; + struct timeval timeout; + fd_set readfds; + + while (false == mca_oob_tcp_component.tcp_shutdown) { + count = 0; + + FD_ZERO(&readfds); + FD_SET(mca_oob_tcp_component.tcp_listen_sd, &readfds); + timeout.tv_sec = 0; + timeout.tv_usec = 10000; + + rc = select(mca_oob_tcp_component.tcp_listen_sd + 1, &readfds, + NULL, NULL, &timeout); + if (rc < 0) { + if (EAGAIN != opal_socket_errno && EINTR != opal_socket_errno) { + perror("select"); + } + continue; + } + + while (count < mca_oob_tcp_component.tcp_copy_spin_count && + opal_list_get_size(&mca_oob_tcp_component.tcp_copy_in_connections) < + (size_t) mca_oob_tcp_component.tcp_copy_max_size) { + OPAL_FREE_LIST_WAIT(&mca_oob_tcp_component.tcp_pending_connections_fl, + fl_item, rc); + item = (mca_oob_tcp_pending_connection_t*) fl_item; + item->fd = accept(mca_oob_tcp_component.tcp_listen_sd, + (struct sockaddr*)&(item->addr), &addrlen); + if(item->fd < 0) { + OPAL_FREE_LIST_RETURN(&mca_oob_tcp_component.tcp_pending_connections_fl, + fl_item); + + if (mca_oob_tcp_component.tcp_shutdown) return NULL; + + if(opal_socket_errno != EAGAIN || opal_socket_errno != EWOULDBLOCK) { + opal_output(0, "mca_oob_tcp_accept: accept() failed with errno %d.", opal_socket_errno); + close(item->fd); + return NULL; + } + + count++; + continue; + } + + if(mca_oob_tcp_component.tcp_debug) { + opal_output(0, "[%lu,%lu,%lu] mca_oob_tcp_listen_thread: (%d, %d) %s:%d\n", + ORTE_NAME_ARGS(orte_process_info.my_name), + item->fd, opal_socket_errno, + inet_ntoa(item->addr.sin_addr), + item->addr.sin_port); + } + + opal_list_append(&mca_oob_tcp_component.tcp_copy_in_connections, + (opal_list_item_t*) item); + } + + if (0 < opal_list_get_size(&mca_oob_tcp_component.tcp_copy_in_connections)) { + opal_mutex_lock(&mca_oob_tcp_component.tcp_pending_connections_lock); + opal_list_join(&mca_oob_tcp_component.tcp_pending_connections, + opal_list_get_end(&mca_oob_tcp_component.tcp_pending_connections), + &mca_oob_tcp_component.tcp_copy_in_connections); + opal_mutex_unlock(&mca_oob_tcp_component.tcp_pending_connections_lock); + } + } + + return NULL; +} + +/* called from opal_progress() to create the oob contact information + for the file descriptors accepted() by the accept thread. */ +static int mca_oob_tcp_listen_progress(void) +{ + int count = 0; + mca_oob_tcp_pending_connection_t *item; + mca_oob_tcp_event_t* event; +#if OPAL_TIMER_USEC_NATIVE + opal_timer_t now = opal_timer_base_get_usec(); +#else + opal_timer_t now = opal_timer_base_get_cycles(); +#endif /* OPAL_TIMER_USEC_NATIVE */ + + /* if we've not pulled pending connections for a while OR we've + hit the high water mark of pending connections, grab all the + pending connections */ + if ((now - mca_oob_tcp_component.tcp_last_copy_time > + mca_oob_tcp_component.tcp_copy_delta) || + ((size_t) mca_oob_tcp_component.tcp_copy_max_size < + opal_list_get_size(&mca_oob_tcp_component.tcp_pending_connections))) { + + /* copy the pending connections from the list the accept + thread is inserting into into a temporary list for us to + process from. This is an O(1) operation, so we minimize + the lock time */ + opal_mutex_lock(&mca_oob_tcp_component.tcp_pending_connections_lock); + opal_list_join(&mca_oob_tcp_component.tcp_copy_out_connections, + opal_list_get_end(&mca_oob_tcp_component.tcp_copy_out_connections), + &mca_oob_tcp_component.tcp_pending_connections); + opal_mutex_unlock(&mca_oob_tcp_component.tcp_pending_connections_lock); + + /* process al the connections */ + while (NULL != (item = (mca_oob_tcp_pending_connection_t*) + opal_list_remove_first(&mca_oob_tcp_component. + tcp_copy_out_connections))) { + + /* setup socket options */ + mca_oob_tcp_set_socket_options(item->fd); + + /* log the accept */ + if(mca_oob_tcp_component.tcp_debug) { + opal_output(0, "[%lu,%lu,%lu] mca_oob_tcp_listen_progress: %s:%d\n", + ORTE_NAME_ARGS(orte_process_info.my_name), + inet_ntoa(item->addr.sin_addr), + item->addr.sin_port); + } + + /* wait for receipt of peers process identifier to + complete this connection */ + event = OBJ_NEW(mca_oob_tcp_event_t); + opal_event_set(&event->event, item->fd, OPAL_EV_READ, mca_oob_tcp_recv_handler, event); + opal_event_add(&event->event, 0); + OPAL_FREE_LIST_RETURN(&mca_oob_tcp_component.tcp_pending_connections_fl, + (opal_free_list_item_t *) item); + + count++; + } + + mca_oob_tcp_component.tcp_last_copy_time = now; + } + + return count; +} + + +static int mca_oob_tcp_create_listen_thread(void) +{ + struct sockaddr_in inaddr; + opal_socklen_t addrlen; + int flags; + + /* create a listen socket for incoming connections */ + mca_oob_tcp_component.tcp_listen_sd = socket(AF_INET, SOCK_STREAM, 0); + if(mca_oob_tcp_component.tcp_listen_sd < 0) { + opal_output(0,"mca_oob_tcp_component_init: socket() failed with errno=%d", opal_socket_errno); + return ORTE_ERROR; + } + + /* setup socket options */ + mca_oob_tcp_set_socket_options(mca_oob_tcp_component.tcp_listen_sd); + + /* bind address */ + memset(&inaddr, 0, sizeof(inaddr)); + inaddr.sin_family = AF_INET; + inaddr.sin_addr.s_addr = INADDR_ANY; + inaddr.sin_port = 0; + + if(bind(mca_oob_tcp_component.tcp_listen_sd, (struct sockaddr*)&inaddr, sizeof(inaddr)) < 0) { + opal_output(0,"mca_oob_tcp_create_listen: bind() failed with errno=%d", opal_socket_errno); + return ORTE_ERROR; + } + + /* resolve system assigned port */ + addrlen = sizeof(struct sockaddr_in); + if(getsockname(mca_oob_tcp_component.tcp_listen_sd, (struct sockaddr*)&inaddr, &addrlen) < 0) { + opal_output(0, "mca_oob_tcp_create_listen: getsockname() failed with errno=%d", opal_socket_errno); + return ORTE_ERROR; + } + mca_oob_tcp_component.tcp_listen_port = inaddr.sin_port; + + /* setup listen backlog to maximum allowed by kernel */ + if(listen(mca_oob_tcp_component.tcp_listen_sd, SOMAXCONN) < 0) { + opal_output(0, "mca_oob_tcp_component_init: listen() failed with errno=%d", opal_socket_errno); + return ORTE_ERROR; + } + + /* set socket up to be non-blocking, otherwise accept could block */ + if((flags = fcntl(mca_oob_tcp_component.tcp_listen_sd, F_GETFL, 0)) < 0) { + opal_output(0, "mca_oob_tcp_component_init: fcntl(F_GETFL) failed with errno=%d", opal_socket_errno); + return ORTE_ERROR; + } else { + flags |= O_NONBLOCK; + if(fcntl(mca_oob_tcp_component.tcp_listen_sd, F_SETFL, flags) < 0) { + opal_output(0, "mca_oob_tcp_component_init: fcntl(F_SETFL) failed with errno=%d", opal_socket_errno); + return ORTE_ERROR; + } + } + + /* start the listen thread */ + mca_oob_tcp_component.tcp_listen_thread.t_run = mca_oob_tcp_listen_thread; + mca_oob_tcp_component.tcp_listen_thread.t_arg = NULL; + + return opal_thread_start(&mca_oob_tcp_component.tcp_listen_thread); +} + + /* * Handle probe */ @@ -537,9 +804,23 @@ mca_oob_t* mca_oob_tcp_component_init(int* priority) memset(&mca_oob_tcp_component.tcp_send_event, 0, sizeof(opal_event_t)); /* create a listen socket */ - if(mca_oob_tcp_create_listen() != ORTE_SUCCESS) { - opal_output(0, "mca_oob_tcp_init: unable to create listen socket\n"); - return NULL; + if (OOB_TCP_EVENT == mca_oob_tcp_component.tcp_listen_type) { + if(mca_oob_tcp_create_listen() != ORTE_SUCCESS) { + opal_output(0, "mca_oob_tcp_init: unable to create listen socket"); + return NULL; + } + } else if (OOB_TCP_LISTEN_THREAD == mca_oob_tcp_component.tcp_listen_type) { + if (mca_oob_tcp_create_listen_thread() != ORTE_SUCCESS) { + opal_output(0, "mca_oob_tcp_init: unable to create listen thread"); + return NULL; + } + opal_free_list_init(&mca_oob_tcp_component.tcp_pending_connections_fl, + sizeof(mca_oob_tcp_pending_connection_t), + OBJ_CLASS(mca_oob_tcp_pending_connection_t), + 16, /* initial number */ + -1, /* maximum number */ + 16); /* increment to grow by */ + opal_progress_register(mca_oob_tcp_listen_progress); } return &mca_oob_tcp; } @@ -932,8 +1213,16 @@ int mca_oob_tcp_fini(void) /* close listen socket */ if (mca_oob_tcp_component.tcp_listen_sd >= 0) { - opal_event_del(&mca_oob_tcp_component.tcp_recv_event); - CLOSE_THE_SOCKET(mca_oob_tcp_component.tcp_listen_sd); + if (OOB_TCP_EVENT == mca_oob_tcp_component.tcp_listen_type) { + opal_event_del(&mca_oob_tcp_component.tcp_recv_event); + close(mca_oob_tcp_component.tcp_listen_sd); + } else if (OOB_TCP_LISTEN_THREAD == mca_oob_tcp_component.tcp_listen_type) { + void *data; + mca_oob_tcp_component.tcp_shutdown = true; + close(mca_oob_tcp_component.tcp_listen_sd); + opal_thread_join(&mca_oob_tcp_component.tcp_listen_thread, &data); + opal_progress_unregister(mca_oob_tcp_listen_progress); + } mca_oob_tcp_component.tcp_listen_sd = -1; } diff --git a/orte/mca/oob/tcp/oob_tcp.h b/orte/mca/oob/tcp/oob_tcp.h index c570e4bc9a..30e3fdfabf 100644 --- a/orte/mca/oob/tcp/oob_tcp.h +++ b/orte/mca/oob/tcp/oob_tcp.h @@ -34,6 +34,7 @@ #include "opal/threads/condition.h" #include "orte/mca/oob/tcp/oob_tcp_peer.h" #include "orte/mca/oob/tcp/oob_tcp_msg.h" +#include "opal/mca/timer/base/base.h" #if defined(c_plusplus) || defined(__cplusplus) @@ -223,11 +224,6 @@ void mca_oob_tcp_registry_callback( void mca_oob_tcp_set_socket_options(int sd); -typedef enum { - OOB_TCP_EVENT, - OOB_TCP_LISTEN_THREAD -} mca_oob_tcp_listen_type_t; - /** * OOB TCP Component */ @@ -258,6 +254,19 @@ struct mca_oob_tcp_component_t { opal_condition_t tcp_match_cond; /**< condition variable used in finalize */ int tcp_match_count; /**< number of matched recvs in progress */ int tcp_debug; /**< debug level */ + + bool tcp_shutdown; + enum { OOB_TCP_EVENT, OOB_TCP_LISTEN_THREAD } tcp_listen_type; + opal_thread_t tcp_listen_thread; + opal_free_list_t tcp_pending_connections_fl; + opal_list_t tcp_pending_connections; + opal_list_t tcp_copy_out_connections; + opal_list_t tcp_copy_in_connections; + opal_mutex_t tcp_pending_connections_lock; + opal_timer_t tcp_last_copy_time; + opal_timer_t tcp_copy_delta; + int tcp_copy_max_size; + int tcp_copy_spin_count; }; /** @@ -273,6 +282,14 @@ ORTE_MODULE_DECLSPEC extern mca_oob_tcp_component_t mca_oob_tcp_component; #define CLOSE_THE_SOCKET(socket) close(socket) #endif /* defined(__WINDOWS__) */ +struct mca_oob_tcp_pending_connection_t { + opal_free_list_item_t super; + int fd; + struct sockaddr_in addr; + }; + typedef struct mca_oob_tcp_pending_connection_t mca_oob_tcp_pending_connection_t; + OBJ_CLASS_DECLARATION(mca_oob_tcp_pending_connection_t); + #if defined(c_plusplus) || defined(__cplusplus) } #endif diff --git a/orte/mca/oob/tcp/oob_tcp_peer.c b/orte/mca/oob/tcp/oob_tcp_peer.c index 47f46a510e..f60ec4cae3 100644 --- a/orte/mca/oob/tcp/oob_tcp_peer.c +++ b/orte/mca/oob/tcp/oob_tcp_peer.c @@ -379,7 +379,7 @@ static int mca_oob_tcp_peer_start_connect(mca_oob_tcp_peer_t* peer) static void mca_oob_tcp_peer_complete_connect(mca_oob_tcp_peer_t* peer) { int so_error = 0; - ompi_socklen_t so_length = sizeof(so_error); + opal_socklen_t so_length = sizeof(so_error); /* unregister from receiving event notifications */ opal_event_del(&peer->peer_send_event); @@ -467,7 +467,7 @@ void mca_oob_tcp_peer_close(mca_oob_tcp_peer_t* peer) * get stuck in the orte_wait_kill when receiving messages in the * tcp OOB. */ OPAL_THREAD_UNLOCK(&peer->peer_lock); - orte_errmgr.abort(); + orte_errmgr.error_detected(1, "OOB: Connection to HNP lost", NULL); } } @@ -787,8 +787,8 @@ static void mca_oob_tcp_peer_dump(mca_oob_tcp_peer_t* peer, const char* msg) char buff[255]; int sndbuf,rcvbuf,nodelay,flags; struct sockaddr_in inaddr; - ompi_socklen_t optlen; - ompi_socklen_t addrlen = sizeof(struct sockaddr_in); + opal_socklen_t optlen; + opal_socklen_t addrlen = sizeof(struct sockaddr_in); getsockname(peer->peer_sd, (struct sockaddr*)&inaddr, &addrlen); sprintf(src, "%s", inet_ntoa(inaddr.sin_addr)); diff --git a/orte/mca/pls/Makefile.am b/orte/mca/pls/Makefile.am index c997715aab..65e0b6c3c2 100644 --- a/orte/mca/pls/Makefile.am +++ b/orte/mca/pls/Makefile.am @@ -25,7 +25,7 @@ nobase_orte_HEADERS = dist_pkgdata_DATA = # local files -headers = pls.h +headers = pls.h pls_types.h libmca_pls_la_SOURCES += $(headers) # Conditionally install the header files diff --git a/orte/mca/pls/base/Makefile.am b/orte/mca/pls/base/Makefile.am index a678534af6..7df8bf1463 100644 --- a/orte/mca/pls/base/Makefile.am +++ b/orte/mca/pls/base/Makefile.am @@ -19,12 +19,14 @@ dist_pkgdata_DATA += base/help-pls-base.txt headers += \ + base/pls_private.h \ base/base.h libmca_pls_la_SOURCES += \ base/pls_base_close.c \ - base/pls_base_context.c \ + base/pls_base_general_support_fns.c \ base/pls_base_open.c \ + base/pls_base_receive.c \ base/pls_base_select.c \ - base/pls_base_state.c \ - base/pls_base_proxy.c + base/pls_base_dmn_registry_fns.c \ + base/pls_base_orted_cmds.c diff --git a/orte/mca/pls/base/base.h b/orte/mca/pls/base/base.h index 39864bd338..2ed28a9851 100644 --- a/orte/mca/pls/base/base.h +++ b/orte/mca/pls/base/base.h @@ -25,9 +25,11 @@ * includes */ #include "orte_config.h" + #include "opal/mca/mca.h" +#include "opal/class/opal_list.h" + #include "orte/mca/pls/pls.h" -#include "orte/mca/ras/base/ras_base_node.h" #if defined(c_plusplus) || defined(__cplusplus) @@ -41,13 +43,9 @@ extern "C" { /** Verbose/debug output stream */ int pls_output; /** List of opened components */ - opal_list_t pls_opened; - /** Whether the list of opened components is valid */ - bool pls_opened_valid; - /** Sorted list of available components (highest priority first) */ - opal_list_t pls_available; - /** Whether the list of available components is valid */ - bool pls_available_valid; + opal_list_t available_components; + /** selected component */ + orte_pls_base_component_t selected_component; } orte_pls_base_t; /** @@ -55,24 +53,6 @@ extern "C" { */ ORTE_DECLSPEC extern orte_pls_base_t orte_pls_base; - /** - * pls component/module/priority tuple - */ - struct orte_pls_base_cmp_t { - /** Base object */ - opal_list_item_t super; - /** pls component */ - orte_pls_base_component_t *component; - /** pls module */ - orte_pls_base_module_t* module; - /** This component's priority */ - int priority; - }; - /** Convenience typedef */ - typedef struct orte_pls_base_cmp_t orte_pls_base_cmp_t; - /** Class declaration */ - ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_pls_base_cmp_t); - /* * Global functions for MCA overall collective open and close */ @@ -84,69 +64,13 @@ extern "C" { /** * Select a pls module */ - ORTE_DECLSPEC orte_pls_base_module_t *orte_pls_base_select(char *preferred); + ORTE_DECLSPEC int orte_pls_base_select(void); + /** * Close the pls framework */ ORTE_DECLSPEC int orte_pls_base_finalize(void); ORTE_DECLSPEC int orte_pls_base_close(void); - /** - * Utility routine to get/set procesS pid - */ - ORTE_DECLSPEC int orte_pls_base_set_proc_pid(const orte_process_name_t*, pid_t); - ORTE_DECLSPEC int orte_pls_base_get_proc_pid(const orte_process_name_t*, pid_t*); - /** - * Utility routine to retreive all process pids w/in a specified job. - */ - ORTE_DECLSPEC int orte_pls_base_get_proc_pids(orte_jobid_t jobid, pid_t** pids, orte_std_cntr_t* num_pids); - /** - * Utility routine to get/set daemon pid - */ - ORTE_DECLSPEC int orte_pls_base_set_node_pid(orte_cellid_t cellid, char* node_name, orte_jobid_t jobid, pid_t pid); - ORTE_DECLSPEC int orte_pls_base_get_node_pids(orte_jobid_t jobid, pid_t** pids, orte_std_cntr_t* num_pids); - - /** - * Utility routine to set progress engine schedule - */ - ORTE_DECLSPEC int orte_pls_base_set_progress_sched(int sched); - - - /** - * Utilities for pls components that use proxy daemons - */ - int orte_pls_base_proxy_set_node_name(orte_ras_node_t* node, - orte_jobid_t jobid, - orte_process_name_t* name); - int orte_pls_base_proxy_mca_argv(int *argc, char ***argv); - int orte_pls_base_proxy_terminate_job(orte_jobid_t jobid); - int orte_pls_base_proxy_terminate_proc(const orte_process_name_t *proc); - int orte_pls_base_proxy_signal_job(orte_jobid_t jobid, int32_t signal); - int orte_pls_base_proxy_signal_proc(const orte_process_name_t *proc, int32_t signal); - - /** - * Check that the cwd in an app context exists and is accessible. - * If the user specified the cwd and we can chdir to it, print an - * error and fail. If the user didn't specify it (i.e., it's a - * default), then see if chdir($HOME) would succeed. - * - * If either chdir() would succeed and do_chdir is true, then - * actually do the chdir(). - * - * If we fall back to the chdir($HOME), set context->cwd to be a - * string pointing to the home directory name (owned by the - * context; safe to free at destruction). - */ - ORTE_DECLSPEC int orte_pls_base_check_context_cwd(orte_app_context_t *context, - bool do_chdir); - - /** - * Check that the app exists and is executable. If it is not, - * print and error and fail. If it is, and if the app was a naked - * executable (i.e., no relative or absolute path), replace the - * app with the string containing the absolute pathname to the - * exectuable (owned by the context; safe to free at destruction). - */ - ORTE_DECLSPEC int orte_pls_base_check_context_app(orte_app_context_t *context); #if defined(c_plusplus) || defined(__cplusplus) } diff --git a/orte/mca/pls/base/pls_base_close.c b/orte/mca/pls/base/pls_base_close.c index 0aa1f36c4d..ec49d27033 100644 --- a/orte/mca/pls/base/pls_base_close.c +++ b/orte/mca/pls/base/pls_base_close.c @@ -29,35 +29,20 @@ int orte_pls_base_finalize(void) { - /* Finalize all available modules */ - if (orte_pls_base.pls_available_valid) { - opal_list_item_t* item; - while (NULL != - (item = opal_list_remove_first(&orte_pls_base.pls_available))) { - orte_pls_base_cmp_t* cmp = (orte_pls_base_cmp_t*) item; - opal_output(orte_pls_base.pls_output, - "orte:base:close: finalizing module %s", - cmp->component->pls_version.mca_component_name); - if (NULL != cmp->module->finalize) { - cmp->module->finalize(); - } - OBJ_RELEASE(cmp); - } - } - orte_pls_base.pls_available_valid = false; + /* Finalize the selected module */ + orte_pls.finalize(); + return ORTE_SUCCESS; } int orte_pls_base_close(void) { - /* Close all remaining open components */ - if (orte_pls_base.pls_opened_valid) { - orte_pls_base.pls_opened_valid = false; - mca_base_components_close(orte_pls_base.pls_output, - &orte_pls_base.pls_opened, NULL); - OBJ_DESTRUCT(&orte_pls_base.pls_opened); - } + /* Close all open components */ + mca_base_components_close(orte_pls_base.pls_output, + &orte_pls_base.available_components, NULL); + OBJ_DESTRUCT(&orte_pls_base.available_components); + return ORTE_SUCCESS; } diff --git a/orte/mca/pls/base/pls_base_dmn_registry_fns.c b/orte/mca/pls/base/pls_base_dmn_registry_fns.c new file mode 100644 index 0000000000..14dd6b746a --- /dev/null +++ b/orte/mca/pls/base/pls_base_dmn_registry_fns.c @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/orte_constants.h" + +#include + +#include "opal/util/output.h" +#include "opal/util/argv.h" + +#include "orte/mca/ns/ns.h" +#include "orte/mca/gpr/gpr.h" +#include "orte/mca/errmgr/errmgr.h" + +#include "orte/mca/pls/base/pls_private.h" + +static void orte_pls_daemon_info_construct(orte_pls_daemon_info_t* ptr) +{ + ptr->cell = ORTE_CELLID_INVALID; + ptr->nodename = NULL; + ptr->name = NULL; + ptr->active_job = ORTE_JOBID_INVALID; +} + +/* destructor - used to free any resources held by instance */ +static void orte_pls_daemon_info_destructor(orte_pls_daemon_info_t* ptr) +{ + if (NULL != ptr->nodename) free(ptr->nodename); + if (NULL != ptr->name) free(ptr->name); +} +OBJ_CLASS_INSTANCE(orte_pls_daemon_info_t, /* type name */ + opal_list_item_t, /* parent "class" name */ + orte_pls_daemon_info_construct, /* constructor */ + orte_pls_daemon_info_destructor); /* destructor */ + +/* + * Store the active daemons for a job + */ +int orte_pls_base_store_active_daemons(opal_list_t *daemons, orte_jobid_t job) +{ + orte_pls_daemon_info_t *dmn; + opal_list_item_t *item; + orte_gpr_value_t **values; + char *jobid_string, *key; + int rc, i, num_daemons; + + /* determine the number of daemons */ + num_daemons = opal_list_get_size(daemons); + + /* since each daemon gets recorded in a separate node's container, + * we need to allocate space for num_daemons value objects + */ + values = (orte_gpr_value_t**)malloc(num_daemons * sizeof(orte_gpr_value_t*)); + if (NULL == values) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + memset(values, 0, num_daemons*sizeof(orte_gpr_value_t*)); /* NULL the array */ + + /* setup the key */ + if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, job))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(values[0]); + return rc; + } + asprintf(&key, "%s-%s", ORTE_NODE_BOOTPROXY_KEY, jobid_string); + free(jobid_string); + + /* loop through the values and the list and create all the value objects */ + item = opal_list_get_first(daemons); + for (i=0; i < num_daemons; i++) { + dmn = (orte_pls_daemon_info_t*)item; + + if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&values[i], + ORTE_GPR_OVERWRITE, + ORTE_NODE_SEGMENT, + 1, 0))) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + goto CLEANUP; + } + + if (ORTE_SUCCESS != (rc = orte_schema.get_node_tokens(&(values[i]->tokens), &(values[i]->num_tokens), + dmn->cell, dmn->nodename))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[i]->keyvals[0]), key, ORTE_NAME, dmn->name))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + item = opal_list_get_next(item); + } + + rc = orte_gpr.put(num_daemons, values); + if (ORTE_SUCCESS != rc) { + ORTE_ERROR_LOG(rc); + } + +CLEANUP: + for (i=0; i < num_daemons; i++) { + if (NULL != values[i]) OBJ_RELEASE(values[i]); + } + if (NULL != values) free(values); + free(key); + + return rc; +} + +/* + * Retrieve a list of the active daemons for a job + */ +int orte_pls_base_get_active_daemons(opal_list_t *daemons, orte_jobid_t job) +{ + orte_gpr_value_t **values; + orte_gpr_keyval_t *kv; + orte_std_cntr_t cnt, i, j; + char* jobid_string; + char *keys[] = { + NULL, /* placeholder */ + ORTE_NODE_NAME_KEY, + ORTE_CELLID_KEY, + NULL + }; + orte_cellid_t *cell; + orte_pls_daemon_info_t *dmn; + int rc; + + /* setup the key */ + if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, job))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(values[0]); + return rc; + } + asprintf(&keys[0], "%s-%s", ORTE_NODE_BOOTPROXY_KEY, jobid_string); + free(jobid_string); + + /* query the daemon info */ + if (ORTE_SUCCESS != (rc = orte_gpr.get(ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR, + ORTE_NODE_SEGMENT, + NULL, /* all containers */ + keys, + &cnt, &values))) { + ORTE_ERROR_LOG(rc); + free(keys[0]); + return rc; + } + + /* loop through the answers and construct the list */ + for (i=0; i < cnt; i++) { + /* each container should have only one set of values */ + dmn = OBJ_NEW(orte_pls_daemon_info_t); + if (NULL == dmn) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + goto CLEANUP; + } + for (j=0; j < values[i]->cnt; j++) { + kv = values[i]->keyvals[j]; + if (0 == strcmp(kv->key, keys[0])) { + if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), kv->value->data, ORTE_NAME))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + continue; + } + if (0 == strcmp(kv->key, ORTE_NODE_NAME_KEY)) { + /* use the dss.copy function here to protect us against zero-length strings */ + if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->nodename), kv->value->data, ORTE_STRING))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + continue; + } + if (0 == strcmp(kv->key, ORTE_CELLID_KEY)) { + if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&cell, kv->value, ORTE_CELLID))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + dmn->cell = *cell; + continue; + } + } + /* add this daemon to the list */ + opal_list_append(daemons, &dmn->super); + OBJ_RELEASE(values[i]); + } + +CLEANUP: + for (i=0; i < cnt; i++) { + if (NULL != values[i]) OBJ_RELEASE(values[i]); + } + if (NULL != values) free(values); + free(keys[0]); + + return rc; +} + +/* + * Retrieve the active daemon(s) for a specific node + */ diff --git a/orte/mca/pls/base/pls_base_general_support_fns.c b/orte/mca/pls/base/pls_base_general_support_fns.c new file mode 100644 index 0000000000..b8b439d008 --- /dev/null +++ b/orte/mca/pls/base/pls_base_general_support_fns.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/orte_constants.h" + +#include "opal/util/argv.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/mca/pls/base/pls_private.h" + + +static int lookup_set(char *a, char *b, char *c, int default_val, + char *token, int *argc, char ***argv) +{ + int id, rc; + + id = mca_base_param_find(a, b, c); + if (id < 0) { + id = mca_base_param_register_int(a, b, c, NULL, default_val); + } + mca_base_param_lookup_int(id, &rc); + if (rc) { + opal_argv_append(argc, argv, token); + } + + return ORTE_SUCCESS; +} + + +int orte_pls_base_mca_argv(int *argc, char ***argv) +{ + lookup_set("orted", "spin", NULL, 0, "--spin", argc, argv); + lookup_set("orte", "debug", NULL, 0, "--debug", argc, argv); + lookup_set("orte", "debug", "daemons", 0, "--debug-daemons", argc, argv); + lookup_set("orte", "debug", "daemons_file", 0, "--debug-daemons-file", argc, argv); + + return ORTE_SUCCESS; +} + diff --git a/orte/mca/pls/base/pls_base_open.c b/orte/mca/pls/base/pls_base_open.c index 59ab7b4034..e5fa0667a5 100644 --- a/orte/mca/pls/base/pls_base_open.c +++ b/orte/mca/pls/base/pls_base_open.c @@ -22,6 +22,8 @@ #include "opal/util/output.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" + +#include "orte/mca/pls/pls.h" #include "orte/mca/pls/base/base.h" @@ -39,6 +41,11 @@ */ orte_pls_base_t orte_pls_base; +/* + * The default module + */ +orte_pls_base_module_t orte_pls; + /** * Function for finding and opening either all MCA modules, or the one @@ -59,18 +66,14 @@ int orte_pls_base_open(void) orte_pls_base.pls_output = -1; } - orte_pls_base.pls_opened_valid = false; - orte_pls_base.pls_available_valid = false; - /* Open up all the components that we can find */ if (ORTE_SUCCESS != mca_base_components_open("pls", orte_pls_base.pls_output, mca_pls_base_static_components, - &orte_pls_base.pls_opened, true)) { + &orte_pls_base.available_components, true)) { return ORTE_ERROR; } - orte_pls_base.pls_opened_valid = true; /* All done */ diff --git a/orte/mca/pls/base/pls_base_orted_cmds.c b/orte/mca/pls/base/pls_base_orted_cmds.c new file mode 100644 index 0000000000..86cfff8de2 --- /dev/null +++ b/orte/mca/pls/base/pls_base_orted_cmds.c @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ + +#include "orte_config.h" +#include "orte/orte_constants.h" + +#include "opal/util/output.h" +#include "opal/util/argv.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/dss/dss.h" +#include "orte/mca/ns/ns_types.h" +#include "orte/mca/odls/odls_types.h" +#include "orte/mca/rml/rml.h" +#include "orte/mca/errmgr/errmgr.h" + +#include "orte/mca/pls/base/pls_private.h" + + +int orte_pls_base_orted_exit(opal_list_t *daemons) +{ + int rc; + orte_buffer_t cmd, answer; + orte_daemon_cmd_flag_t command=ORTE_DAEMON_EXIT_CMD; + opal_list_item_t *item; + orte_pls_daemon_info_t *dmn; + + OBJ_CONSTRUCT(&cmd, orte_buffer_t); + + /* pack the command */ + if (ORTE_SUCCESS != (rc = orte_dss.pack(&cmd, &command, 1, ORTE_DAEMON_CMD))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* send the commands as fast as we can */ + for (item = opal_list_get_first(daemons); + item != opal_list_get_end(daemons); + item = opal_list_get_next(item)) { + dmn = (orte_pls_daemon_info_t*)item; + + if (0 > orte_rml.send_buffer(dmn->name, &cmd, ORTE_RML_TAG_PLS_ORTED, 0)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_DESTRUCT(&cmd); + return ORTE_ERR_COMM_FAILURE; + } + + OBJ_CONSTRUCT(&answer, orte_buffer_t); + if (0 > orte_rml.recv_buffer(dmn->name, &answer, ORTE_RML_TAG_PLS_ORTED)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + } + OBJ_DESTRUCT(&answer); + } + +CLEANUP: + OBJ_DESTRUCT(&cmd); + + /* we're done! */ + return ORTE_SUCCESS; +} + + +int orte_pls_base_orted_kill_local_procs(opal_list_t *daemons, orte_jobid_t job) +{ + int rc; + orte_buffer_t cmd, answer; + orte_daemon_cmd_flag_t command=ORTE_DAEMON_KILL_LOCAL_PROCS; + opal_list_item_t *item; + orte_pls_daemon_info_t *dmn; + + OBJ_CONSTRUCT(&cmd, orte_buffer_t); + + /* pack the command */ + if (ORTE_SUCCESS != (rc = orte_dss.pack(&cmd, &command, 1, ORTE_DAEMON_CMD))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* pack the jobid */ + if (ORTE_SUCCESS != (rc = orte_dss.pack(&cmd, &job, 1, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* send the commands as fast as we can */ + for (item = opal_list_get_first(daemons); + item != opal_list_get_end(daemons); + item = opal_list_get_next(item)) { + dmn = (orte_pls_daemon_info_t*)item; + + if (0 > orte_rml.send_buffer(dmn->name, &cmd, ORTE_RML_TAG_PLS_ORTED, 0)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_DESTRUCT(&cmd); + return rc; + } + + OBJ_CONSTRUCT(&answer, orte_buffer_t); + if (0 > orte_rml.recv_buffer(dmn->name, &answer, ORTE_RML_TAG_PLS_ORTED)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + } + OBJ_DESTRUCT(&answer); + } + +CLEANUP: + OBJ_DESTRUCT(&cmd); + + /* we're done! */ + return ORTE_SUCCESS; +} + + + +int orte_pls_base_orted_signal_local_procs(opal_list_t *daemons, int32_t signal) +{ + int rc; + orte_buffer_t cmd, answer; + orte_daemon_cmd_flag_t command=ORTE_DAEMON_SIGNAL_LOCAL_PROCS; + opal_list_item_t *item; + orte_pls_daemon_info_t *dmn; + + OBJ_CONSTRUCT(&cmd, orte_buffer_t); + + /* pack the command */ + if (ORTE_SUCCESS != (rc = orte_dss.pack(&cmd, &command, 1, ORTE_DAEMON_CMD))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* pack the jobid */ + if (ORTE_SUCCESS != (rc = orte_dss.pack(&cmd, &signal, 1, ORTE_INT32))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* send the commands as fast as we can */ + for (item = opal_list_get_first(daemons); + item != opal_list_get_end(daemons); + item = opal_list_get_next(item)) { + dmn = (orte_pls_daemon_info_t*)item; + + if (0 > orte_rml.send_buffer(dmn->name, &cmd, ORTE_RML_TAG_PLS_ORTED, 0)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_DESTRUCT(&cmd); + return rc; + } + + OBJ_CONSTRUCT(&answer, orte_buffer_t); + if (0 > orte_rml.recv_buffer(dmn->name, &answer, ORTE_RML_TAG_PLS_ORTED)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + } + OBJ_DESTRUCT(&answer); + } + +CLEANUP: + OBJ_DESTRUCT(&cmd); + + /* we're done! */ + return ORTE_SUCCESS; +} + + +int orte_pls_base_orted_add_local_procs(opal_list_t *daemons, orte_gpr_notify_data_t *ndat) +{ + int rc; + orte_buffer_t cmd, answer; + orte_daemon_cmd_flag_t command=ORTE_DAEMON_ADD_LOCAL_PROCS; + opal_list_item_t *item; + orte_pls_daemon_info_t *dmn; + + OBJ_CONSTRUCT(&cmd, orte_buffer_t); + + /* pack the command */ + if (ORTE_SUCCESS != (rc = orte_dss.pack(&cmd, &command, 1, ORTE_DAEMON_CMD))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* pack the jobid */ + if (ORTE_SUCCESS != (rc = orte_dss.pack(&cmd, &ndat, 1, ORTE_GPR_NOTIFY_DATA))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* send the commands as fast as we can */ + for (item = opal_list_get_first(daemons); + item != opal_list_get_end(daemons); + item = opal_list_get_next(item)) { + dmn = (orte_pls_daemon_info_t*)item; + + if (0 > orte_rml.send_buffer(dmn->name, &cmd, ORTE_RML_TAG_PLS_ORTED, 0)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_DESTRUCT(&cmd); + return rc; + } + + OBJ_CONSTRUCT(&answer, orte_buffer_t); + if (0 > orte_rml.recv_buffer(dmn->name, &answer, ORTE_RML_TAG_PLS_ORTED)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + } + OBJ_DESTRUCT(&answer); + } + +CLEANUP: + OBJ_DESTRUCT(&cmd); + + /* we're done! */ + return ORTE_SUCCESS; +} + diff --git a/orte/mca/pls/base/pls_base_proxy.c b/orte/mca/pls/base/pls_base_proxy.c deleted file mode 100644 index 90e43e0d03..0000000000 --- a/orte/mca/pls/base/pls_base_proxy.c +++ /dev/null @@ -1,418 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#include "orte_config.h" - -#include "opal/util/output.h" -#include "opal/util/argv.h" -#include "opal/mca/base/mca_base_param.h" -#include "orte/mca/pls/base/base.h" -#include "orte/orte_constants.h" -#include "orte/mca/ns/ns.h" -#include "orte/mca/pls/pls.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/gpr/gpr.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/ras/base/ras_base_node.h" -#include "orte/mca/rmgr/base/base.h" - - -int -orte_pls_base_proxy_set_node_name(orte_ras_node_t* node, - orte_jobid_t jobid, - orte_process_name_t* name) -{ - orte_gpr_value_t* values[1]; - char* jobid_string, *key; - int rc; - - if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&values[0], - ORTE_GPR_OVERWRITE, - ORTE_NODE_SEGMENT, - 1, 0))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, jobid))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(values[0]); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_schema.get_node_tokens(&(values[0]->tokens), &(values[0]->num_tokens), - node->node_cellid, node->node_name))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(values[0]); - free(jobid_string); - return rc; - } - - asprintf(&key, "%s-%s", ORTE_NODE_BOOTPROXY_KEY, jobid_string); - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[0]->keyvals[0]), key, ORTE_NAME, name))) { - ORTE_ERROR_LOG(rc); - free(jobid_string); - free(key); - OBJ_RELEASE(values[0]); - return rc; - } - - rc = orte_gpr.put(1, values); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - } - - OBJ_RELEASE(values[0]); - free(jobid_string); - free(key); - - return rc; -} - - - -static int lookup_set(char *a, char *b, char *c, int default_val, - char *token, int *argc, char ***argv) -{ - int id, rc; - - id = mca_base_param_find(a, b, c); - if (id < 0) { - id = mca_base_param_register_int(a, b, c, NULL, default_val); - } - mca_base_param_lookup_int(id, &rc); - if (rc) { - opal_argv_append(argc, argv, token); - } - - return ORTE_SUCCESS; -} - - -int orte_pls_base_proxy_mca_argv(int *argc, char ***argv) -{ - lookup_set("orte", "debug", NULL, 0, "--debug", argc, argv); - lookup_set("orte", "debug", "daemons", 0, "--debug-daemons", argc, argv); - lookup_set("orte", "debug", "daemons_file", 0, "--debug-daemons-file", argc, argv); - - return ORTE_SUCCESS; -} - - -/** - * Wait for a pending job to complete. - */ -static void orte_pls_rsh_terminate_job_rsp( - int status, - orte_process_name_t* peer, - orte_buffer_t* rsp, - orte_rml_tag_t tag, - void* cbdata) -{ - int rc; - if (ORTE_SUCCESS != (rc = orte_rmgr_base_unpack_rsp(rsp))) { - ORTE_ERROR_LOG(rc); - } -} - - -static void orte_pls_rsh_terminate_job_cb( - int status, - orte_process_name_t* peer, - orte_buffer_t* req, - orte_rml_tag_t tag, - void* cbdata) -{ - /* wait for response */ - int rc; - if (status < 0) { - ORTE_ERROR_LOG(status); - if(NULL != req) - OBJ_RELEASE(req); - return; - } - - if (0 > (rc = orte_rml.recv_buffer_nb(peer, ORTE_RML_TAG_RMGR_CLNT, 0, orte_pls_rsh_terminate_job_rsp, NULL))) { - ORTE_ERROR_LOG(rc); - } - - if(NULL != req) - OBJ_RELEASE(req); -} - - -int -orte_pls_base_proxy_terminate_job(orte_jobid_t jobid) -{ - char *keys[2]; - char *jobid_string; - orte_gpr_value_t** values = NULL; - orte_std_cntr_t i, j, num_values = 0; - orte_process_name_t proc, *pnptr; - int rc; - - if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, jobid))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - asprintf(&keys[0], "%s-%s", ORTE_NODE_BOOTPROXY_KEY, jobid_string); - keys[1] = NULL; - - rc = orte_gpr.get( - ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR, - ORTE_NODE_SEGMENT, - NULL, - keys, - &num_values, - &values - ); - if (rc != ORTE_SUCCESS) { - free(jobid_string); - return rc; - } - if (0 == num_values) { - rc = ORTE_ERR_NOT_FOUND; - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - for(i=0; icnt; j++) { - orte_gpr_keyval_t* keyval = value->keyvals[j]; - orte_buffer_t *cmd = OBJ_NEW(orte_buffer_t); - int ret; - if (cmd == NULL) { - rc = ORTE_ERR_OUT_OF_RESOURCE; - ORTE_ERROR_LOG(rc); - goto cleanup; - } - if (strcmp(keyval->key, keys[0]) != 0) - continue; - - /* construct command */ - ret = orte_rmgr_base_pack_cmd(cmd, ORTE_RMGR_CMD_TERM_JOB, jobid); - if (ORTE_SUCCESS != ret) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(cmd); - rc = ret; - continue; - } - - /* get the process name from the returned keyval */ - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pnptr, values[i]->keyvals[0]->value, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - rc = ret; - continue; - } - proc = *pnptr; - - /* send a terminate message to the bootproxy on each node */ - if (0 > (ret = orte_rml.send_buffer_nb( - &proc, - cmd, - ORTE_RML_TAG_RMGR_SVC, - 0, - orte_pls_rsh_terminate_job_cb, - NULL))) { - - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(cmd); - rc = ret; - continue; - } - } - } - -cleanup: - - free(jobid_string); - free(keys[0]); - - if (NULL != values) { - for(i=0; i (rc = orte_rml.recv_buffer_nb(peer, ORTE_RML_TAG_RMGR_CLNT, 0, orte_pls_rsh_signal_job_rsp, NULL))) { - ORTE_ERROR_LOG(rc); - } - OBJ_RELEASE(req); -} - - -int -orte_pls_base_proxy_signal_job(orte_jobid_t jobid, int32_t signal) -{ - char *keys[2]; - char *jobid_string; - orte_gpr_value_t** values = NULL; - orte_std_cntr_t i, j, num_values = 0; - orte_process_name_t proc, *pnptr; - int rc; - - if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, jobid))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - asprintf(&keys[0], "%s-%s", ORTE_NODE_BOOTPROXY_KEY, jobid_string); - keys[1] = NULL; - - rc = orte_gpr.get( - ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR, - ORTE_NODE_SEGMENT, - NULL, - keys, - &num_values, - &values - ); - if (rc != ORTE_SUCCESS) { - free(jobid_string); - return rc; - } - if (0 == num_values) { - rc = ORTE_ERR_NOT_FOUND; - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - for(i=0; icnt; j++) { - orte_gpr_keyval_t* keyval = value->keyvals[j]; - orte_buffer_t *cmd = OBJ_NEW(orte_buffer_t); - int ret; - if (cmd == NULL) { - rc = ORTE_ERR_OUT_OF_RESOURCE; - ORTE_ERROR_LOG(rc); - goto cleanup; - } - if (strcmp(keyval->key, keys[0]) != 0) - continue; - - /** construct command */ - ret = orte_rmgr_base_pack_signal_job_cmd(cmd, jobid, signal); - if (ORTE_SUCCESS != ret) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(cmd); - rc = ret; - continue; - } - - /** get the process name from the returned keyval */ - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pnptr, values[i]->keyvals[0]->value, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - rc = ret; - continue; - } - proc = *pnptr; - - /** send a signal message to the bootproxy on each node */ - if (0 > (ret = orte_rml.send_buffer_nb( - &proc, - cmd, - ORTE_RML_TAG_RMGR_SVC, - 0, - orte_pls_rsh_signal_job_cb, - NULL))) { - - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(cmd); - rc = ret; - continue; - } - } - } - -cleanup: - - free(jobid_string); - free(keys[0]); - - if (NULL != values) { - for(i=0; i orte_rml.send_buffer(sender, &answer, tag, 0)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + } + + /* cleanup */ + OBJ_DESTRUCT(&answer); +} + diff --git a/orte/mca/pls/base/pls_base_select.c b/orte/mca/pls/base/pls_base_select.c index 5f19de5b57..beec3718eb 100644 --- a/orte/mca/pls/base/pls_base_select.c +++ b/orte/mca/pls/base/pls_base_select.c @@ -27,122 +27,23 @@ #include "orte/mca/pls/base/base.h" -/* - * Local functions - */ - -static orte_pls_base_module_t *select_preferred(char *name); -static orte_pls_base_module_t *select_any(void); - -static int compare(opal_list_item_t **a, opal_list_item_t **b); -static void cmp_constructor(orte_pls_base_cmp_t *cmp); -static void cmp_destructor(orte_pls_base_cmp_t *cmp); - - -/* - * Global variables - */ -OBJ_CLASS_INSTANCE(orte_pls_base_cmp_t, opal_list_item_t, - cmp_constructor, cmp_destructor); - - - -/* - * Function for selecting one component from all those that are +/** +* Function for selecting one component from all those that are * available. */ -orte_pls_base_module_t* orte_pls_base_select(char *preferred) -{ - orte_pls_base_module_t *ret; - /* Construct the empty list */ - - OBJ_CONSTRUCT(&orte_pls_base.pls_available, opal_list_t); - orte_pls_base.pls_available_valid = true; - - /* Now - did we want a specific one? */ - - if (NULL != preferred) { - ret = select_preferred(preferred); - } else { - ret = select_any(); - } - if (NULL == ret) { - opal_show_help("help-pls-base.txt", "no-available-pls", true); - } - return ret; -} - - -static orte_pls_base_module_t *select_preferred(char *name) +int orte_pls_base_select(void) { opal_list_item_t *item; mca_base_component_list_item_t *cli; - orte_pls_base_component_t *component; - orte_pls_base_module_t *module; - orte_pls_base_cmp_t *cmp; - int priority; - - /* Look for a matching selected name */ - - opal_output(orte_pls_base.pls_output, - "orte:base:select: looking for component %s", name); - for (item = opal_list_get_first(&orte_pls_base.pls_opened); - item != opal_list_get_end(&orte_pls_base.pls_opened); - item = opal_list_get_next(item)) { - cli = (mca_base_component_list_item_t *) item; - component = (orte_pls_base_component_t *) cli->cli_component; - - /* If we found it, call the component's init function to see - if we get a module back */ - - if (0 == strcmp(name, - component->pls_version.mca_component_name)) { - opal_output(orte_pls_base.pls_output, - "orte:base:select: found module for compoent %s", name); - module = component->pls_init(&priority); - - /* If we got a non-NULL module back, then the component wants - to be considered for selection */ - - if (NULL != module) { - opal_output(orte_pls_base.pls_output, - "orte:base:open: component %s returns priority %d", - component->pls_version.mca_component_name, - priority); - - cmp = OBJ_NEW(orte_pls_base_cmp_t); - cmp->component = component; - cmp->module = module; - cmp->priority = priority; - - opal_list_append(&orte_pls_base.pls_available, &cmp->super); - return module; - } - } - } - - /* Didn't find a matching name */ - - opal_output(orte_pls_base.pls_output, - "orte:base:select: did not find module for compoent %s", name); - return NULL; -} - - -static orte_pls_base_module_t *select_any(void) -{ - opal_list_item_t *item; - mca_base_component_list_item_t *cli; - orte_pls_base_component_t *component; - orte_pls_base_module_t *module; - orte_pls_base_cmp_t *cmp; - int priority; + orte_pls_base_component_t *component, *best_component = NULL; + orte_pls_base_module_t *module, *best_module = NULL; + int priority, best_priority = -1; /* Query all the opened components and see if they want to run */ - for (item = opal_list_get_first(&orte_pls_base.pls_opened); - opal_list_get_end(&orte_pls_base.pls_opened) != item; + for (item = opal_list_get_first(&orte_pls_base.available_components); + opal_list_get_end(&orte_pls_base.available_components) != item; item = opal_list_get_next(item)) { cli = (mca_base_component_list_item_t *) item; component = (orte_pls_base_component_t *) cli->cli_component; @@ -159,76 +60,45 @@ static orte_pls_base_module_t *select_any(void) to be considered for selection */ if (NULL != module) { - opal_output(orte_pls_base.pls_output, - "orte:base:open: component %s returns priority %d", - component->pls_version.mca_component_name, - priority); + /* If this is the best one, save it */ + if (priority > best_priority) { - cmp = OBJ_NEW(orte_pls_base_cmp_t); - cmp->component = component; - cmp->module = module; - cmp->priority = priority; + /* If there was a previous best one, finalize */ + if (NULL != best_module) { + best_module->finalize(); + } - opal_list_append(&orte_pls_base.pls_available, &cmp->super); - } else { - opal_output(orte_pls_base.pls_output, + /* Save the new best one */ + best_module = module; + best_component = component; + + /* update the best priority */ + best_priority = priority; + } else { + opal_output(orte_pls_base.pls_output, "orte:base:open: component %s does NOT want to be considered for selection", component->pls_version.mca_component_name); + if (NULL == module->finalize) { + opal_output(orte_pls_base.pls_output, + "It appears you are the victim of a stale library - please delete your installation lib directory and reinstall"); + } else { + module->finalize(); + } + } } } - /* If the list is empty, return NULL */ + /* If we didn't find one to select, barf */ - if (opal_list_is_empty(&orte_pls_base.pls_available)) { - opal_output(orte_pls_base.pls_output, - "orte:base:select: no components available!"); - return NULL; + if (NULL == best_component) { + return ORTE_ERROR; } - /* Sort the resulting available list in priority order */ + /* We have happiness -- save the component and module for later + usage */ - opal_list_sort(&orte_pls_base.pls_available, compare); + orte_pls = *best_module; + orte_pls_base.selected_component = *best_component; - /* Otherwise, return the first item (it's already sorted in - priority order) */ - - item = opal_list_get_first(&orte_pls_base.pls_available); - cmp = (orte_pls_base_cmp_t *) item; - opal_output(orte_pls_base.pls_output, - "orte:base:select: highest priority component: %s", - cmp->component->pls_version.mca_component_name); - return cmp->module; -} - - -/* - * Need to make this an *opposite* compare (this is invoked by qsort) - * so that we get the highest priority first (i.e., so the sort is - * highest->lowest, not lowest->highest) - */ -static int compare(opal_list_item_t **a, opal_list_item_t **b) -{ - orte_pls_base_cmp_t *aa = *((orte_pls_base_cmp_t **) a); - orte_pls_base_cmp_t *bb = *((orte_pls_base_cmp_t **) b); - - if (bb->priority > aa->priority) { - return 1; - } else if (bb->priority == aa->priority) { - return 0; - } else { - return -1; - } -} - -static void cmp_constructor(orte_pls_base_cmp_t *cmp) -{ - cmp->component = NULL; - cmp->module = NULL; - cmp->priority = -1; -} - - -static void cmp_destructor(orte_pls_base_cmp_t *cmp) -{ - cmp_constructor(cmp); + return ORTE_SUCCESS; } diff --git a/orte/mca/pls/base/pls_private.h b/orte/mca/pls/base/pls_private.h new file mode 100644 index 0000000000..427f75267e --- /dev/null +++ b/orte/mca/pls/base/pls_private.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + */ + +#ifndef MCA_PLS_PRIVATE_H +#define MCA_PLS_PRIVATE_H + +/* + * includes + */ +#include "orte_config.h" + +#include "opal/class/opal_list.h" + +#include "orte/dss/dss_types.h" +#include "orte/mca/gpr/gpr_types.h" +#include "orte/mca/ns/ns_types.h" +#include "orte/mca/ras/ras_types.h" +#include "orte/mca/rmgr/rmgr_types.h" +#include "orte/mca/rml/rml_types.h" + + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + + /* + * pls proxy commands + */ + typedef uint8_t orte_pls_cmd_flag_t; + #define ORTE_PLS_CMD ORTE_UINT8 + #define ORTE_PLS_LAUNCH_JOB_CMD 1 + #define ORTE_PLS_TERMINATE_JOB_CMD 2 + #define ORTE_PLS_TERMINATE_PROC_CMD 3 + #define ORTE_PLS_SIGNAL_JOB_CMD 4 + #define ORTE_PLS_SIGNAL_PROC_CMD 5 + #define ORTE_PLS_TERMINATE_ORTEDS_CMD 6 + + /* + * object for daemon information + */ + typedef struct orte_pls_daemon_info_t { + opal_list_item_t super; + orte_cellid_t cell; + char *nodename; + orte_process_name_t *name; + orte_jobid_t active_job; + } orte_pls_daemon_info_t; + OBJ_CLASS_DECLARATION(orte_pls_daemon_info_t); + + + /** + * Utility routine to set progress engine schedule + */ + ORTE_DECLSPEC int orte_pls_base_set_progress_sched(int sched); + + + /** + * Utilities for pls components that use proxy daemons + */ + int orte_pls_base_orted_exit(opal_list_t *daemons); + int orte_pls_base_orted_kill_local_procs(opal_list_t *daemons, orte_jobid_t job); + int orte_pls_base_orted_signal_local_procs(opal_list_t *daemons, int32_t signal); + int orte_pls_base_orted_add_local_procs(opal_list_t *daemons, orte_gpr_notify_data_t *ndat); + + int orte_pls_base_get_active_daemons(opal_list_t *daemons, orte_jobid_t job); + int orte_pls_base_store_active_daemons(opal_list_t *daemons, orte_jobid_t job); + + /* + * communications utilities + */ + int orte_pls_base_comm_start(void); + int orte_pls_base_comm_stop(void); + void orte_pls_base_recv(int status, orte_process_name_t* sender, + orte_buffer_t* buffer, orte_rml_tag_t tag, + void* cbdata); + + /* + * general utilities + */ + int orte_pls_base_mca_argv(int *argc, char ***argv); + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif +#endif diff --git a/orte/mca/pls/bproc/Makefile.am b/orte/mca/pls/bproc/Makefile.am index 0b402ead72..9e38e6d040 100644 --- a/orte/mca/pls/bproc/Makefile.am +++ b/orte/mca/pls/bproc/Makefile.am @@ -33,6 +33,7 @@ endif sources = \ pls_bproc.h \ pls_bproc.c \ + pls_bproc_state.c \ pls_bproc_component.c mcacomponentdir = $(libdir)/openmpi diff --git a/orte/mca/pls/bproc/pls_bproc.c b/orte/mca/pls/bproc/pls_bproc.c index 11344d6c30..66a3ef0888 100644 --- a/orte/mca/pls/bproc/pls_bproc.c +++ b/orte/mca/pls/bproc/pls_bproc.c @@ -48,6 +48,7 @@ #include "opal/util/output.h" #include "opal/util/opal_environ.h" #include "opal/util/path.h" +#include "opal/util/os_path.h" #include "opal/util/show_help.h" #include "orte/dss/dss.h" @@ -55,18 +56,21 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/iof/iof.h" #include "orte/mca/gpr/gpr.h" -#include "orte/mca/ns/base/base.h" +#include "orte/mca/ns/ns.h" #include "orte/mca/sds/base/base.h" #include "orte/mca/oob/base/base.h" #include "orte/mca/ras/base/base.h" -#include "orte/mca/rmgr/base/base.h" -#include "orte/mca/rmaps/base/base.h" -#include "orte/mca/rmaps/base/rmaps_base_map.h" +#include "orte/mca/rmgr/rmgr.h" +#include "orte/mca/rmaps/rmaps_types.h" #include "orte/mca/rml/rml.h" #include "orte/mca/smr/smr.h" #include "orte/runtime/orte_wait.h" #include "orte/runtime/runtime.h" +/* remove this when moved to 2.0 */ +#include "orte/mca/rmaps/base/rmaps_private.h" + +#include "orte/mca/pls/base/pls_private.h" #include "pls_bproc.h" /** @@ -91,6 +95,7 @@ orte_pls_base_module_t orte_pls_bproc_module = { orte_pls_bproc_launch, #endif orte_pls_bproc_terminate_job, + orte_pls_bproc_terminate_orteds, orte_pls_bproc_terminate_proc, orte_pls_bproc_signal_job, orte_pls_bproc_signal_proc, @@ -226,13 +231,13 @@ static int orte_pls_bproc_setup_io(orte_jobid_t jobid, struct bproc_io_t * io, return ORTE_ERROR; } - rc = orte_ns_base_convert_jobid_to_string(&job, jobid); + rc = orte_ns.convert_jobid_to_string(&job, jobid); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; } /* build the directory tree the io files will be in */ - if (0 > asprintf(&path, OPAL_PATH_SEP"tmp"OPAL_PATH_SEP"openmpi-bproc-%s"OPAL_PATH_SEP"%s"OPAL_PATH_SEP"%s-%d"OPAL_PATH_SEP"%d", + if (0 > asprintf(&frontend, OPAL_PATH_SEP"tmp"OPAL_PATH_SEP"openmpi-bproc-%s"OPAL_PATH_SEP"%s"OPAL_PATH_SEP"%s-%d"OPAL_PATH_SEP"%d", orte_system_info.user, orte_universe_info.name, job, app_context, node_rank)) { rc = ORTE_ERR_OUT_OF_RESOURCE; @@ -278,8 +283,11 @@ static int orte_pls_bproc_setup_io(orte_jobid_t jobid, struct bproc_io_t * io, } /** - * Callback for orte_wait_cb. This function decrements the number of currently - * running processes, and when this hits 0 it kills all the daemons + * Callback for orte_wait_cb. This function ONLY gets called for + * normal termination, or termination caused by a signal. If the + * process abnormally terminates by other than a signal, we go through + * another function so it can tell us that it was abnormal. + * Bproc doesn't really let us do it through here. * @param wpid the process's pid * @param status tells why the process died * @param data a pointer to the process's name @@ -287,9 +295,10 @@ static int orte_pls_bproc_setup_io(orte_jobid_t jobid, struct bproc_io_t * io, static void orte_pls_bproc_waitpid_cb(pid_t wpid, int status, void *data) { orte_process_name_t * proc = (orte_process_name_t*) data; int rc; + /* set the state of this process */ if(WIFEXITED(status)) { - rc = orte_smr.set_proc_state(proc, ORTE_PROC_STATE_TERMINATED, status); + rc = orte_smr.set_proc_state(proc, ORTE_PROC_STATE_TERMINATED, status); } else { rc = orte_smr.set_proc_state(proc, ORTE_PROC_STATE_ABORTED, status); } @@ -297,14 +306,6 @@ static void orte_pls_bproc_waitpid_cb(pid_t wpid, int status, void *data) { ORTE_ERROR_LOG(rc); } free(proc); - - OPAL_THREAD_LOCK(&mca_pls_bproc_component.lock); - mca_pls_bproc_component.num_procs--; - if(0 < mca_pls_bproc_component.debug) { - opal_output(0, "in orte_pls_bproc_waitpid_cb, %d processes left\n", - mca_pls_bproc_component.num_procs); - } - OPAL_THREAD_UNLOCK(&mca_pls_bproc_component.lock); } /** @@ -317,7 +318,7 @@ static void orte_pls_bproc_waitpid_cb(pid_t wpid, int status, void *data) { static void orte_pls_bproc_waitpid_daemon_cb(pid_t wpid, int status, void *data) { if(!mca_pls_bproc_component.done_launching) { /* if a daemon exits before we are done launching the user apps we send a - * message to ourself so we will break out of the recieve loop and exit */ + * message to ourself so we will break out of the receive loop and exit */ orte_buffer_t ack; int rc; int src[4] = {-1, -1}; @@ -331,7 +332,7 @@ static void orte_pls_bproc_waitpid_daemon_cb(pid_t wpid, int status, void *data) if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); } - rc = mca_oob_send_packed(MCA_OOB_NAME_SELF, &ack, MCA_OOB_TAG_BPROC, 0); + rc = mca_oob_send_packed(ORTE_RML_NAME_SELF, &ack, ORTE_RML_TAG_BPROC, 0); if(0 > rc) { ORTE_ERROR_LOG(rc); } @@ -433,11 +434,6 @@ static void orte_pls_bproc_setup_env(char *** env) opal_setenv(var, orte_system_info.user, true, env); free(var); - /* tell the bootproxy to use the bproc_orted pls */ - var = mca_base_param_environ_variable("rmgr", "bootproxy", "pls"); - opal_setenv(var, "bproc_orted", true, env); - free(var); - /* gpr replica contact info */ if(NULL == orte_process_info.gpr_replica) { orte_ns.copy_process_name(&orte_process_info.gpr_replica, @@ -499,6 +495,14 @@ static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp, orte_vpid_t daemon_vpid_start = 0; orte_std_cntr_t idx; struct stat buf; + opal_list_t daemons; + orte_pls_daemon_info_t *dmn; + opal_list_item_t *item; + + /* setup a list that will contain the info for all the daemons + * so we can store it on the registry when done + */ + OBJ_CONSTRUCT(&daemons, opal_list_t); /* find the length of the longest node array */ for(i = 0; i < num_contexts; i++) { @@ -526,13 +530,15 @@ static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp, } /* allocate a range of vpids for the daemons */ - rc = orte_ns_base_get_jobid(&daemon_jobid, orte_process_info.my_name); + daemon_jobid = orte_process_info.my_name->jobid; + rc = orte_ns.reserve_range(daemon_jobid, num_daemons, &daemon_vpid_start); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; } - rc = orte_ns.reserve_range(daemon_jobid, num_daemons, &daemon_vpid_start); - if(ORTE_SUCCESS != rc) { + + /* setup the orted triggers for passing their launch info */ + if (ORTE_SUCCESS != (rc = orte_smr.init_orted_stage_gates(jobid, num_daemons, NULL, NULL))) { ORTE_ERROR_LOG(rc); goto cleanup; } @@ -646,12 +652,20 @@ static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp, ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); goto cleanup; } - rc = orte_pls_base_set_node_pid(cellid, param, jobid, pids[i]); + rc = orte_pls_bproc_set_node_pid(cellid, param, jobid, pids[i]); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; } + dmn = OBJ_NEW(orte_pls_daemon_info_t); + orte_dss.copy((void**)&(dmn->name), proc_name, ORTE_NAME); + dmn->cell = cellid; + dmn->nodename = strdup(param); + dmn->active_job = jobid; + opal_list_append(&daemons, &dmn->super); + free(param); + rc = orte_wait_cb(pids[i], orte_pls_bproc_waitpid_daemon_cb, &daemon_list[i]); if(ORTE_SUCCESS != rc) { @@ -660,6 +674,10 @@ static int orte_pls_bproc_launch_daemons(orte_cellid_t cellid, char *** envp, } } } + /* store the daemon info */ + if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) { + ORTE_ERROR_LOG(rc); + } *num_launched = num_daemons; cleanup: @@ -672,6 +690,11 @@ cleanup: if(NULL != orted_path) { free(orted_path); } + while (NULL != (item = opal_list_remove_first(&daemons))) { + OBJ_RELEASE(item); + } + OBJ_DESTRUCT(&daemons); + return rc; } @@ -940,7 +963,7 @@ static int orte_pls_bproc_launch_app(orte_cellid_t cellid, orte_jobid_t jobid, ORTE_ERROR_LOG(rc); goto cleanup; } - orte_pls_base_set_proc_pid(proc_name, pids[j]); + orte_pls_bproc_set_proc_pid(proc_name, pids[j]); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; @@ -1026,30 +1049,34 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) { orte_std_cntr_t idx; char cwd_save[OMPI_PATH_MAX + 1]; + /* make sure the pls_bproc receive function has been started */ + if (ORTE_SUCCESS != (rc = orte_pls_bproc_comm_start())) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* save the current working directory */ if (NULL == getcwd(cwd_save, sizeof(cwd_save))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } cwd_save[sizeof(cwd_save) - 1] = '\0'; + /* query for the application context and allocated nodes */ OBJ_CONSTRUCT(&mapping, opal_list_t); if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_map(jobid, &mapping))) { ORTE_ERROR_LOG(rc); return rc; } - if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_vpid_range(jobid, &vpid_start, - &vpid_range))) { + if(ORTE_SUCCESS != (rc = orte_rmgr.get_vpid_range(jobid, &vpid_start, + &vpid_range))) { ORTE_ERROR_LOG(rc); goto cleanup; } /* get the cellid */ - rc = orte_ns_base_get_cellid(&cellid, orte_process_info.my_name); - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - + cellid = orte_process_info.my_name->cellid; + /* do a large lock so the processes will not decrement the process count * until we are done launching */ @@ -1064,13 +1091,13 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) { /* Check that the cwd is sane. We have to chdir there in to check the executable, because the executable could have been specified as a relative path to the wdir */ - rc = orte_pls_base_check_context_cwd(context, true); + rc = orte_rmgr.check_context_cwd(context, true); if (ORTE_SUCCESS != rc) { goto cleanup; } /* Check that the app exists and is executable */ - rc = orte_pls_base_check_context_app(context); + rc = orte_rmgr.check_context_app(context); if (ORTE_SUCCESS != rc) { goto cleanup; } @@ -1151,7 +1178,7 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) { for(j = 0; j < num_daemons; j++) { orte_buffer_t ack; OBJ_CONSTRUCT(&ack, orte_buffer_t); - rc = mca_oob_recv_packed(MCA_OOB_NAME_ANY, &ack, MCA_OOB_TAG_BPROC); + rc = mca_oob_recv_packed(ORTE_RML_NAME_ANY, &ack, ORTE_RML_TAG_BPROC); if(0 > rc) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&ack); @@ -1189,7 +1216,7 @@ int orte_pls_bproc_launch(orte_jobid_t jobid) { item = opal_list_get_next(item)) { map = (orte_rmaps_base_map_t*)item; - rc = orte_pls_base_check_context_cwd(map->app, true); + rc = orte_rmgr.check_context_cwd(map->app, true); if (ORTE_SUCCESS != rc) { goto cleanup; } @@ -1223,15 +1250,18 @@ cleanup: } /** - * Terminate all processes associated with this job - including - * daemons. - */ + * Terminate all processes associated with this job */ int orte_pls_bproc_terminate_job(orte_jobid_t jobid) { pid_t* pids; orte_std_cntr_t i, num_pids; int rc; + + if(0 < mca_pls_bproc_component.debug) { + opal_output(0, "orte_pls_bproc: terminating job %ld", jobid); + } + /* kill application process */ - if(ORTE_SUCCESS != (rc = orte_pls_base_get_proc_pids(jobid, &pids, &num_pids))) + if(ORTE_SUCCESS != (rc = orte_pls_bproc_get_proc_pids(jobid, &pids, &num_pids))) return rc; for(i=0; i +#include "opal/threads/condition.h" + +#include "orte/class/orte_pointer_array.h" +#include "orte/util/proc_info.h" + +#include "orte/mca/rml/rml_types.h" + +#include "orte/mca/pls/base/base.h" + #if defined(c_plusplus) || defined(__cplusplus) extern "C" { #endif @@ -68,9 +74,30 @@ int orte_pls_bproc_finalize(void); int orte_pls_bproc_launch(orte_jobid_t); int orte_pls_bproc_terminate_job(orte_jobid_t); int orte_pls_bproc_terminate_proc(const orte_process_name_t* proc_name); +int orte_pls_bproc_terminate_orteds(orte_jobid_t jobid); int orte_pls_bproc_signal_job(orte_jobid_t, int32_t); int orte_pls_bproc_signal_proc(const orte_process_name_t* proc_name, int32_t); +/* Utility routine to get/set process pid */ +ORTE_DECLSPEC int orte_pls_bproc_set_proc_pid(const orte_process_name_t*, pid_t); +ORTE_DECLSPEC int orte_pls_bproc_get_proc_pid(const orte_process_name_t*, pid_t*); +/** + * Utility routine to retreive all process pids w/in a specified job. + */ +ORTE_DECLSPEC int orte_pls_bproc_get_proc_pids(orte_jobid_t jobid, pid_t** pids, orte_std_cntr_t* num_pids); +/** + * Utility routine to get/set daemon pid + */ +ORTE_DECLSPEC int orte_pls_bproc_set_node_pid(orte_cellid_t cellid, char* node_name, orte_jobid_t jobid, pid_t pid); +ORTE_DECLSPEC int orte_pls_bproc_get_node_pids(orte_jobid_t jobid, pid_t** pids, orte_std_cntr_t* num_pids); + +/* utility functions for abort communications */ +int orte_pls_bproc_comm_start(void); +int orte_pls_bproc_comm_stop(void); +void orte_pls_bproc_recv(int status, orte_process_name_t* sender, + orte_buffer_t* buffer, orte_rml_tag_t tag, + void* cbdata); + /** * PLS bproc Component */ @@ -107,6 +134,10 @@ struct orte_pls_bproc_component_t { /**< Indicates whether or not this application is to be mapped by node * (if set to true) or by slot (default) */ + bool recv_issued; + /**< Indicates that the comm recv for reporting abnormal proc termination + * has been issued + */ }; /** diff --git a/orte/mca/pls/bproc/pls_bproc_component.c b/orte/mca/pls/bproc/pls_bproc_component.c index fd360500e2..ae229d2513 100644 --- a/orte/mca/pls/bproc/pls_bproc_component.c +++ b/orte/mca/pls/bproc/pls_bproc_component.c @@ -34,7 +34,7 @@ orte_pls_bproc_component_t mca_pls_bproc_component = { { { - ORTE_PLS_BASE_VERSION_1_0_0, + ORTE_PLS_BASE_VERSION_1_3_0, "bproc", /* MCA component name */ ORTE_MAJOR_VERSION, /* MCA component major version */ ORTE_MINOR_VERSION, /* MCA component minor version */ @@ -72,6 +72,7 @@ int orte_pls_bproc_component_open(void) { mca_pls_bproc_component.num_procs = 0; mca_pls_bproc_component.num_daemons = 0; mca_pls_bproc_component.done_launching = false; + mca_pls_bproc_component.recv_issued = false; OBJ_CONSTRUCT(&mca_pls_bproc_component.lock, opal_mutex_t); OBJ_CONSTRUCT(&mca_pls_bproc_component.condition, opal_condition_t); @@ -120,7 +121,7 @@ orte_pls_base_module_t* orte_pls_bproc_init(int *priority) { if(orte_process_info.seed == false) return NULL; - /* okay, we are in a daemon - now check to see if BProc is running here */ + /* okay, we are in an HNP - now check to see if BProc is running here */ ret = bproc_version(&version); if (ret != 0) { return NULL; diff --git a/orte/mca/pls/base/pls_base_state.c b/orte/mca/pls/bproc/pls_bproc_state.c similarity index 77% rename from orte/mca/pls/base/pls_base_state.c rename to orte/mca/pls/bproc/pls_bproc_state.c index edf4996725..395c0df733 100644 --- a/orte/mca/pls/base/pls_base_state.c +++ b/orte/mca/pls/bproc/pls_bproc_state.c @@ -25,23 +25,23 @@ #include "opal/mca/base/base.h" #include "orte/dss/dss.h" -#include "orte/mca/pls/base/base.h" #include "orte/mca/ns/ns.h" #include "orte/mca/gpr/gpr.h" -#include "orte/mca/smr/smr_types.h" +#include "orte/mca/rml/rml.h" +#include "orte/mca/smr/smr.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/schema/schema.h" +#include "orte/mca/pls/bproc/pls_bproc.h" /** * Set the process pid in the job segment and indicate the state * as being launched. */ -int orte_pls_base_set_proc_pid(const orte_process_name_t *name, pid_t pid) +int orte_pls_bproc_set_proc_pid(const orte_process_name_t *name, pid_t pid) { orte_gpr_value_t *values[1]; - orte_proc_state_t proc_state = ORTE_PROC_STATE_LAUNCHED; char *segment; int rc; @@ -53,7 +53,7 @@ int orte_pls_base_set_proc_pid(const orte_process_name_t *name, pid_t pid) if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&values[0], ORTE_GPR_OVERWRITE, segment, - 2, 0))) { + 1, 0))) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); free(segment); return ORTE_ERR_OUT_OF_RESOURCE; @@ -73,13 +73,6 @@ int orte_pls_base_set_proc_pid(const orte_process_name_t *name, pid_t pid) return rc; } - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[0]->keyvals[1]), ORTE_PROC_STATE_KEY, ORTE_PROC_STATE, &proc_state))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(values[0]); - free(segment); - return rc; - } - rc = orte_gpr.put(1, values); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); @@ -90,33 +83,17 @@ int orte_pls_base_set_proc_pid(const orte_process_name_t *name, pid_t pid) OBJ_RELEASE(values[0]); - /** now increment the LAUNCHED counter so that the LAUNCHED trigger can fire! */ - if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&values[0], - ORTE_GPR_OVERWRITE, segment, 1, 1))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - free(segment); - return ORTE_ERR_OUT_OF_RESOURCE; - } - free(segment); /** done with this now */ - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[0]->keyvals[0]), ORTE_PROC_NUM_LAUNCHED, ORTE_UNDEF, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(values[0]); - return rc; - } - values[0]->tokens[0] = strdup(ORTE_JOB_GLOBALS); /* counter is in the job's globals container */ - - if (ORTE_SUCCESS != (rc = orte_gpr.increment_value(values[0]))) { + /* set the process state to LAUNCHED */ + if (ORTE_SUCCESS != (rc = orte_smr.set_proc_state((orte_process_name_t*)name, ORTE_PROC_STATE_LAUNCHED, 0))) { ORTE_ERROR_LOG(rc); } - OBJ_RELEASE(values[0]); - return rc; } /** * Retreive a specified process pid from the registry. */ -int orte_pls_base_get_proc_pid(const orte_process_name_t* name, pid_t* pid) +int orte_pls_bproc_get_proc_pid(const orte_process_name_t* name, pid_t* pid) { char *segment; char **tokens; @@ -188,7 +165,7 @@ cleanup: /** * Retrieve all process pids for the specified job. */ -int orte_pls_base_get_proc_pids(orte_jobid_t jobid, pid_t **pids, orte_std_cntr_t* num_pids) +int orte_pls_bproc_get_proc_pids(orte_jobid_t jobid, pid_t **pids, orte_std_cntr_t* num_pids) { char *segment; char *keys[2]; @@ -252,7 +229,7 @@ cleanup: * the daemons. */ -int orte_pls_base_set_node_pid(orte_cellid_t cellid, char* node_name, orte_jobid_t jobid, pid_t pid) +int orte_pls_bproc_set_node_pid(orte_cellid_t cellid, char* node_name, orte_jobid_t jobid, pid_t pid) { orte_gpr_value_t *values[1]; char *jobid_string, *key; @@ -300,9 +277,9 @@ int orte_pls_base_set_node_pid(orte_cellid_t cellid, char* node_name, orte_jobid /** - * Retreive all daemon pids for the specified job. + * Retrieve all daemon pids for the specified job. */ -int orte_pls_base_get_node_pids(orte_jobid_t jobid, pid_t **pids, orte_std_cntr_t* num_pids) +int orte_pls_bproc_get_node_pids(orte_jobid_t jobid, pid_t **pids, orte_std_cntr_t* num_pids) { char *keys[2]; orte_gpr_value_t** values = NULL; @@ -355,4 +332,63 @@ cleanup: return rc; } +/* + * FUNCTIONS FOR DEALING WITH ABNORMAL TERMINATION OF BPROC + * APPLICATION PROCESSES + */ +int orte_pls_bproc_comm_start(void) +{ + int rc; + + if (mca_pls_bproc_component.recv_issued) { + return ORTE_SUCCESS; + } + + if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_RML_NAME_ANY, + ORTE_RML_TAG_BPROC_ABORT, + ORTE_RML_PERSISTENT, + orte_pls_bproc_recv, + NULL))) { + ORTE_ERROR_LOG(rc); + } + mca_pls_bproc_component.recv_issued = true; + + return rc; +} + + +int orte_pls_bproc_comm_stop(void) +{ + int rc; + + if (!mca_pls_bproc_component.recv_issued) { + return ORTE_SUCCESS; + } + + if (ORTE_SUCCESS != (rc = orte_rml.recv_cancel(ORTE_RML_NAME_ANY, ORTE_RML_TAG_BPROC_ABORT))) { + ORTE_ERROR_LOG(rc); + } + mca_pls_bproc_component.recv_issued = false; + + return rc; +} + + +/* + * handle message from proxies + * NOTE: The incoming buffer "buffer" is OBJ_RELEASED by the calling program. + * DO NOT RELEASE THIS BUFFER IN THIS CODE + */ + +void orte_pls_bproc_recv(int status, orte_process_name_t* sender, + orte_buffer_t* buffer, orte_rml_tag_t tag, + void* cbdata) +{ + int rc; + + /* we don't care what was in the buffer - just set the state of the sender to ABORTED */ + if (ORTE_SUCCESS != (rc = orte_smr.set_proc_state(sender, ORTE_PROC_STATE_ABORTED, 0))) { + ORTE_ERROR_LOG(rc); + } + } diff --git a/orte/mca/pls/bproc_orted/pls_bproc_orted.h b/orte/mca/pls/bproc_orted/pls_bproc_orted.h deleted file mode 100644 index 9515badd2a..0000000000 --- a/orte/mca/pls/bproc_orted/pls_bproc_orted.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file: - * Part of the bproc launching system. This launching system is broken into 2 - * parts: pls_bproc and pls_bproc_orted. pls_bproc runs on the head node in the - * seed daemons and pls_bproc_orted runs on the remote nodes in the daemon. - * - * The main job of pls_bproc_orted is to setup ptys/pipes for IO forwarding. - * See pls_bproc.h for an overview of how the entire bproc launching system works. - */ -#ifndef ORTE_PLS_BPROC_ORTED_H_ -#define ORTE_PLS_BPROC_ORTED_H_ - -#include "orte_config.h" -#include "opal/mca/mca.h" -#include "opal/threads/condition.h" -#include "orte/mca/pls/pls.h" -#include - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/* - * Module open / close - */ -int orte_pls_bproc_orted_component_open(void); -int orte_pls_bproc_orted_component_close(void); -orte_pls_base_module_t* orte_pls_bproc_orted_init(int *priority); - -/* - * Startup / Shutdown - */ -int orte_pls_bproc_orted_finalize(void); - -/* - * Interface - */ -int orte_pls_bproc_orted_launch(orte_jobid_t); -int orte_pls_bproc_orted_terminate_job(orte_jobid_t); -int orte_pls_bproc_orted_terminate_proc(const orte_process_name_t* proc_name); -int orte_pls_bproc_orted_signal_job(orte_jobid_t, int32_t); -int orte_pls_bproc_orted_signal_proc(const orte_process_name_t* proc_name, int32_t); - -/** - * PLS bproc_orted component - */ -struct orte_pls_bproc_orted_component_t { - orte_pls_base_component_t super; - /**< The base class */ - int debug; - /**< If greater than 0 print debugging information */ - int priority; - /**< The priority of this component. This will be returned if we determine - * that bproc is available and running on this node, */ - opal_mutex_t lock; - /**< Lock used to prevent some race conditions */ -}; -/** - * Convenience typedef - */ -typedef struct orte_pls_bproc_orted_component_t orte_pls_bproc_orted_component_t; - -ORTE_DECLSPEC orte_pls_bproc_orted_component_t mca_pls_bproc_orted_component; -ORTE_DECLSPEC orte_pls_base_module_t orte_pls_bproc_orted_module; - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif /* ORTE_PLS_BPROC_ORTED_H_ */ - diff --git a/orte/mca/pls/cnos/Makefile.am b/orte/mca/pls/cnos/Makefile.am new file mode 100644 index 0000000000..8e4f7d0b8d --- /dev/null +++ b/orte/mca/pls/cnos/Makefile.am @@ -0,0 +1,49 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Use the top-level Makefile.options + + + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if OMPI_BUILD_rmgr_cnos_DSO +component_noinst = +component_install = mca_rmgr_cnos.la +else +component_noinst = libmca_rmgr_cnos.la +component_install = +endif + +cnos_SOURCES = \ + rmgr_cnos.c \ + rmgr_cnos.h \ + rmgr_cnos_component.c + +mcacomponentdir = $(libdir)/openmpi +mcacomponent_LTLIBRARIES = $(component_install) +mca_rmgr_cnos_la_SOURCES = $(cnos_SOURCES) +mca_rmgr_cnos_la_LIBADD = \ + $(top_ompi_builddir)/orte/liborte.la \ + $(top_ompi_builddir)/opal/libopal.la +mca_rmgr_cnos_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_rmgr_cnos_la_SOURCES = $(cnos_SOURCES) +libmca_rmgr_cnos_la_LIBADD = +libmca_rmgr_cnos_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/pls/cnos/configure.m4 b/orte/mca/pls/cnos/configure.m4 new file mode 100644 index 0000000000..39bc3e7e58 --- /dev/null +++ b/orte/mca/pls/cnos/configure.m4 @@ -0,0 +1,37 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_pls_cnos_CONFIG([action-if-found], [action-if-not-found]) +# ----------------------------------------------------------- +AC_DEFUN([MCA_pls_cnos_CONFIG],[ + pls_cnos_happy="no" + # see if we should enable super secret utcp support + if test "$with_pls_cnos" = "utcp" ; then + pls_cnos_happy="yes" + else + # check for cnos functions + AC_CHECK_FUNC([cnos_barrier], + [pls_cnos_happy="yes"], + [pls_cnos_happy="no"]) + fi + + AC_CHECK_FUNCS([killrank cnos_pm_barrier]) + + AS_IF([test "$pls_cnos_happy" = "yes"], [$1], [$2]) +])dnl diff --git a/orte/mca/pls/cnos/configure.params b/orte/mca/pls/cnos/configure.params new file mode 100644 index 0000000000..a597d81a3b --- /dev/null +++ b/orte/mca/pls/cnos/configure.params @@ -0,0 +1,21 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Specific to this module + +PARAM_INIT_FILE=pls_cnos.c +PARAM_CONFIG_FILES="Makefile" diff --git a/orte/mca/pls/cnos/pls_cnos.c b/orte/mca/pls/cnos/pls_cnos.c new file mode 100644 index 0000000000..6e8bd8611d --- /dev/null +++ b/orte/mca/pls/cnos/pls_cnos.c @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "orte_config.h" + +#include +#include +#ifdef HAVE_UNISTD_H +#include +#endif /* HAVE_UNISTD_H */ +#ifdef HAVE_STRING_H +#include +#endif /* HAVE_STRING_H */ +#ifdef HAVE_SIGNAL_H +#include +#endif +#ifdef HAVE_CNOS_PM_BARRIER +#include +#endif + +#include "orte/orte_constants.h" +#include "orte/mca/rmgr/base/base.h" +#include "orte/mca/ns/ns.h" +#include "orte/util/proc_info.h" +#include "pls_cnos.h" + + +static int orte_pls_cnos_launch_job(orte_jobid_t jobid); +static int orte_pls_cnos_terminate_job(orte_jobid_t jobid); +static int orte_pls_cnos_terminate_orteds(orte_jobid_t jobid); +static int orte_pls_cnos_terminate_proc(const orte_process_name_t* proc_name); +static int orte_pls_cnos_signal_job(orte_jobid_t jobid, int32_t signal); +static int orte_pls_cnos_signal_proc(const orte_process_name_t* proc_name, int32_t signal); +static int orte_pls_cnos_finalize(void); + + +orte_pls_base_module_t orte_pls_cnos_module = { + orte_pls_cnos_launch_job, + orte_pls_cnos_terminate_job, + orte_pls_cnos_terminate_orteds, + orte_pls_cnos_terminate_proc, + orte_pls_cnos_signal_job, + orte_pls_cnos_signal_proc, + orte_pls_cnos_finalize +}; + + +static int orte_pls_cnos_launch_job(orte_jobid_t jobid) +{ + return ORTE_ERR_NOT_SUPPORTED; +} + +#ifdef HAVE_KILLRANK +#include "catamount/types.h" +/* secret sauce on the Cray machine */ +extern int killrank(rank_t RANK, int SIG); +#endif + +static int orte_pls_cnos_terminate_job(orte_jobid_t jobid) +{ +#ifdef HAVE_KILLRANK + orte_jobid_t my_jobid; + + orte_ns.get_jobid(&my_jobid, orte_process_info.my_name); + + /* make sure it's my job */ + if (jobid == my_jobid) { + killrank(-1, SIGKILL); + } else { + return ORTE_ERR_NOT_SUPPORTED; + } +#else + exit(0); +#endif + + return ORTE_SUCCESS; +} + + +static int orte_pls_cnos_terminate_orteds(orte_jobid_t jobid) +{ +#ifdef HAVE_KILLRANK + orte_jobid_t my_jobid; + + orte_ns.get_jobid(&my_jobid, orte_process_info.my_name); + + /* make sure it's my job */ + if (jobid == my_jobid) { + killrank(-1, SIGKILL); + } else { + return ORTE_ERR_NOT_SUPPORTED; + } +#else + exit(0); +#endif + + return ORTE_SUCCESS; +} + +static int orte_pls_cnos_terminate_proc(const orte_process_name_t* proc_name) +{ +#ifdef HAVE_KILLRANK + orte_jobid_t my_jobid; + orte_jobid_t his_jobid; + orte_vpid_t his_vpid; + + orte_ns.get_jobid(&my_jobid, orte_process_info.my_name); + orte_ns.get_jobid(&his_jobid, proc_name); + + orte_ns.get_vpid(&his_vpid, proc_name); + + /* make sure it's my job. This may end up killing me, but what + the heck. */ + if (his_jobid == my_jobid) { + killrank((int) his_vpid, SIGKILL); + } else { + return ORTE_ERR_NOT_SUPPORTED; + } +#else + exit(0); +#endif + + return ORTE_SUCCESS; +} + + +static int orte_pls_cnos_signal_job(orte_jobid_t jobid, int32_t signal) +{ + return ORTE_ERR_NOT_SUPPORTED; +} + +static int orte_pls_cnos_signal_proc(const orte_process_name_t* proc_name, int32_t signal) +{ + return ORTE_ERR_NOT_SUPPORTED; +} + +static int orte_pls_cnos_finalize(void) +{ + return ORTE_SUCCESS; +} diff --git a/orte/mca/pls/cnos/pls_cnos.h b/orte/mca/pls/cnos/pls_cnos.h new file mode 100644 index 0000000000..d384907967 --- /dev/null +++ b/orte/mca/pls/cnos/pls_cnos.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + * + */ +#ifndef ORTE_PLS_CNOS_H +#define ORTE_PLS_CNOS_H + +#include "orte/mca/pls/pls.h" + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +/** Global PLS component */ +ORTE_DECLSPEC extern orte_pls_base_component_t mca_pls_cnos_component; +/** Global PLS module */ +extern orte_pls_base_module_t orte_pls_cnos_module; + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + +#endif diff --git a/orte/mca/pls/cnos/pls_cnos_component.c b/orte/mca/pls/cnos/pls_cnos_component.c new file mode 100644 index 0000000000..7c1aa5f643 --- /dev/null +++ b/orte/mca/pls/cnos/pls_cnos_component.c @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" + +#ifdef HAVE_CNOS_PM_BARRIER +#include +#endif + +#include "orte/orte_constants.h" +#include "orte/util/proc_info.h" +#include "opal/util/output.h" +#include "orte/mca/errmgr/errmgr.h" + +#include "orte/mca/rds/base/base.h" +#include "orte/mca/ras/base/base.h" +#include "orte/mca/rmaps/base/base.h" +#include "orte/mca/pls/base/base.h" +#include "pls_cnos.h" + +/* + * Local functions + */ + +static int orte_pls_cnos_open(void); +static int orte_pls_cnos_close(void); +static orte_pls_base_module_t* orte_pls_cnos_init(int *priority); + + +orte_pls_base_component_t mca_pls_cnos_component = { + /* First, the mca_base_component_t struct containing meta + information about the component itself */ + + { + /* Indicate that we are a rmgr v1.3.0 component (which also + implies a specific MCA version) */ + + ORTE_PLS_BASE_VERSION_1_3_0, + + "cnos", /* MCA component name */ + ORTE_MAJOR_VERSION, /* MCA component major version */ + ORTE_MINOR_VERSION, /* MCA component minor version */ + ORTE_RELEASE_VERSION, /* MCA component release version */ + orte_pls_cnos_open, /* component open */ + orte_pls_cnos_close /* component close */ + }, + + /* Next the MCA v1.0.0 component meta data */ + { + /* Whether the component is checkpointable or not */ + false + }, + + orte_pls_cnos_init +}; + + +/** + * component open/close/init function + */ +static int orte_pls_cnos_open(void) +{ + return ORTE_SUCCESS; +} + + +static orte_rmgr_base_module_t *orte_pls_cnos_init(int* priority) +{ + /* if we can build, then we need to be selected, so + * set a priority higher than the proxy component + */ + *priority = 100; + + return &orte_pls_cnos_module; +} + + +/** + * Close all subsystems. + */ +static int orte_pls_cnos_close(void) +{ + return ORTE_SUCCESS; +} diff --git a/orte/mca/pls/fork/pls_fork.h b/orte/mca/pls/fork/pls_fork.h deleted file mode 100644 index 147ee66b78..0000000000 --- a/orte/mca/pls/fork/pls_fork.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef ORTE_PLS_FORK_EXPORT_H -#define ORTE_PLS_FORK_EXPORT_H - -#include "orte_config.h" - -#include "opal/threads/condition.h" -#include "opal/mca/mca.h" -#include "orte/mca/pls/pls.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/* - * Module open / close - */ -int orte_pls_fork_component_open(void); -int orte_pls_fork_component_close(void); -orte_pls_base_module_t* orte_pls_fork_component_init(int *priority); - -/* - * Startup / Shutdown - */ -int orte_pls_fork_finalize(void); - - -/* - * Interface - */ -int orte_pls_fork_launch(orte_jobid_t); -int orte_pls_fork_terminate_job(orte_jobid_t); -int orte_pls_fork_terminate_proc(const orte_process_name_t* proc_name); -int orte_pls_fork_signal_job(orte_jobid_t, int32_t); -int orte_pls_fork_signal_proc(const orte_process_name_t* proc_name, int32_t signal); - -/** - * PLS Component - */ -struct orte_pls_fork_component_t { - orte_pls_base_component_t super; - int debug; - int priority; - int reap; - int timeout_before_sigkill; - int num_children; - opal_mutex_t lock; - opal_condition_t cond; -}; -typedef struct orte_pls_fork_component_t orte_pls_fork_component_t; - - -ORTE_DECLSPEC extern orte_pls_fork_component_t mca_pls_fork_component; -ORTE_DECLSPEC extern orte_pls_base_module_t orte_pls_fork_module; - - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif /* ORTE_PLS_FORK_EXPORT_H */ diff --git a/orte/mca/pls/fork/pls_fork_component.c b/orte/mca/pls/fork/pls_fork_component.c deleted file mode 100644 index 5d42b42d4f..0000000000 --- a/orte/mca/pls/fork/pls_fork_component.c +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - * These symbols are in a file by themselves to provide nice linker - * semantics. Since linkers generally pull in symbols by object - * files, keeping these symbols as the only symbols in this file - * prevents utility programs such as "ompi_info" from having to import - * entire components just to query their version and parameters. - */ - -#include "orte_config.h" -#include -#ifdef HAVE_UNISTD_H -#include -#endif - -#include "opal/util/argv.h" -#include "opal/util/path.h" -#include "opal/mca/base/mca_base_param.h" -#include "orte/util/proc_info.h" -#include "orte/orte_constants.h" -#include "orte/mca/pls/pls.h" -#include "orte/mca/pls/fork/pls_fork.h" - - -/* - * Public string showing the pls ompi_fork component version number - */ -const char *mca_pls_fork_component_version_string = - "Open MPI fork pls MCA component version " ORTE_VERSION; - - -/* - * Instantiate the public struct with all of our public information - * and pointers to our public functions in it - */ - -orte_pls_fork_component_t mca_pls_fork_component = { - { - /* First, the mca_component_t struct containing meta information - about the component itself */ - - { - /* Indicate that we are a pls v1.0.0 component (which also - implies a specific MCA version) */ - - ORTE_PLS_BASE_VERSION_1_0_0, - - /* Component name and version */ - - "fork", - ORTE_MAJOR_VERSION, - ORTE_MINOR_VERSION, - ORTE_RELEASE_VERSION, - - /* Component open and close functions */ - - orte_pls_fork_component_open, - orte_pls_fork_component_close - }, - - /* Next the MCA v1.0.0 component meta data */ - - { - /* Whether the component is checkpointable or not */ - - true - }, - - /* Initialization / querying functions */ - - orte_pls_fork_component_init - } -}; - - - -int orte_pls_fork_component_open(void) -{ - mca_base_component_t *c = &mca_pls_fork_component.super.pls_version; - - /* initialize globals */ - OBJ_CONSTRUCT(&mca_pls_fork_component.lock, opal_mutex_t); - OBJ_CONSTRUCT(&mca_pls_fork_component.cond, opal_condition_t); - - /* lookup parameters */ - mca_base_param_reg_int(c, "reap", - "Whether to wait to reap all children before finalizing or not", - false, false, 1, &mca_pls_fork_component.reap); - mca_base_param_reg_int(c, "reap_timeout", - "When killing children processes, first send a SIGTERM, then wait at least this timeout (in seconds), then send a SIGKILL", - false, false, 0, &mca_pls_fork_component.timeout_before_sigkill); - mca_base_param_reg_int(c, "priority", - "Priority of this component", - false, false, 1, &mca_pls_fork_component.priority); - mca_base_param_reg_int(c, "debug", - "Whether to enable debugging output or not", - false, false, 0, &mca_pls_fork_component.debug); - if (mca_pls_fork_component.debug == 0) { - int id = mca_base_param_register_int("debug",NULL,NULL,NULL,0); - int value; - mca_base_param_lookup_int(id,&value); - mca_pls_fork_component.debug = (value > 0) ? 1 : 0; - } - return ORTE_SUCCESS; -} - - -orte_pls_base_module_t *orte_pls_fork_component_init(int *priority) -{ - /* Only return a module if we're in the orted */ - if (orte_process_info.daemon) { - *priority = mca_pls_fork_component.priority; - return &orte_pls_fork_module; - } else { - return NULL; - } -} - - -int orte_pls_fork_component_close(void) -{ - OBJ_DESTRUCT(&mca_pls_fork_component.lock); - OBJ_DESTRUCT(&mca_pls_fork_component.cond); - return ORTE_SUCCESS; -} - diff --git a/orte/mca/pls/fork/pls_fork_module.c b/orte/mca/pls/fork/pls_fork_module.c deleted file mode 100644 index a6dabc2181..0000000000 --- a/orte/mca/pls/fork/pls_fork_module.c +++ /dev/null @@ -1,871 +0,0 @@ -/* - * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - * These symbols are in a file by themselves to provide nice linker - * semantics. Since linkers generally pull in symbols by object - * files, keeping these symbols as the only symbols in this file - * prevents utility programs such as "ompi_info" from having to import - * entire components just to query their version and parameters. - */ - -#include "orte_config.h" - -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#include -#include -#ifdef HAVE_SYS_WAIT_H -#include -#endif -#include -#ifdef HAVE_FCNTL_H -#include -#endif -#include -#ifdef HAVE_SYS_PARAM_H -#include -#endif -#ifdef HAVE_NETDB_H -#include -#endif - -#include "orte/orte_constants.h" -#include "opal/event/event.h" -#include "opal/util/argv.h" -#include "opal/util/output.h" -#include "opal/mca/paffinity/base/base.h" -#include "opal/util/show_help.h" -#include "opal/util/path.h" -#include "opal/class/opal_value_array.h" -#include "orte/util/sys_info.h" -#include "orte/util/univ_info.h" -#include "opal/util/opal_environ.h" -#include "orte/util/session_dir.h" -#include "orte/runtime/orte_wait.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/errmgr/base/base.h" -#include "orte/mca/iof/iof.h" -#include "orte/mca/iof/base/iof_base_setup.h" -#include "opal/mca/base/mca_base_param.h" -#include "orte/mca/ns/ns.h" -#include "orte/mca/sds/base/base.h" -#include "orte/mca/pls/pls.h" -#include "orte/mca/pls/base/base.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/gpr/gpr.h" -#include "orte/mca/rmaps/base/base.h" -#include "orte/mca/rmaps/base/rmaps_base_map.h" -#include "orte/mca/smr/smr.h" -#include "orte/mca/pls/fork/pls_fork.h" - -#if !defined(__WINDOWS__) -extern char **environ; -#endif /* !defined(__WINDOWS__) */ - -#if OMPI_HAVE_POSIX_THREADS && OMPI_THREADS_HAVE_DIFFERENT_PIDS && OMPI_ENABLE_PROGRESS_THREADS -static int orte_pls_fork_launch_threaded(orte_jobid_t); -#endif - - -orte_pls_base_module_1_0_0_t orte_pls_fork_module = { -#if OMPI_HAVE_POSIX_THREADS && OMPI_THREADS_HAVE_DIFFERENT_PIDS && OMPI_ENABLE_PROGRESS_THREADS - orte_pls_fork_launch_threaded, -#else - orte_pls_fork_launch, -#endif - orte_pls_fork_terminate_job, - orte_pls_fork_terminate_proc, - orte_pls_fork_signal_job, - orte_pls_fork_signal_proc, - orte_pls_fork_finalize -}; - -static void set_handler_default(int sig); - - -static bool orte_pls_fork_child_died(pid_t pid, unsigned int timeout, int *exit_status) -{ - time_t end; - pid_t ret; - - end = time(NULL) + timeout; - do { - ret = waitpid(pid, exit_status, WNOHANG); - if (pid == ret) { - /* It died -- return success */ - return true; - } else if (-1 == ret && ECHILD == errno) { - /* The pid no longer exists, so we'll call this "good - enough for government work" */ - return true; - } - - /* Sleep for a second */ - sleep(1); - } while (time(NULL) < end); - - /* The child didn't die, so return false */ - return false; -} - -static void orte_pls_fork_kill_processes(opal_value_array_t *pids, opal_value_array_t *procs) -{ - size_t i; - pid_t pid; - orte_process_name_t proc; - int rc, exit_status; - - for (i = 0; i < opal_value_array_get_size(pids); ++i) { - pid = OPAL_VALUE_ARRAY_GET_ITEM(pids, pid_t, i); - - /* de-register the SIGCHILD callback for this pid */ - orte_wait_cb_cancel(pid); - - /* Send a sigterm to the process. If we get ESRCH back, that - means the process is already dead, so just proceed on to - the reaping of it. If we get any other error back, just - skip it and go on to the next process. */ - if (0 != kill(pid, SIGTERM) && ESRCH != errno) { - int err = errno; - char hostname[MAXHOSTNAMELEN]; - gethostname(hostname, sizeof(hostname)); - - opal_show_help("help-orte-pls-fork.txt", - "orte-pls-fork:could-not-send-kill", - true, hostname, pid, err); - - continue; - } - - /* The kill succeeded. Wait up to timeout_before_sigkill - seconds to see if it died. */ - - if (!orte_pls_fork_child_died(pid, mca_pls_fork_component.timeout_before_sigkill, &exit_status)) { - kill(pid, SIGKILL); - /* Double check that it actually died */ - if (!orte_pls_fork_child_died(pid, mca_pls_fork_component.timeout_before_sigkill, &exit_status)) { - char hostname[MAXHOSTNAMELEN]; - gethostname(hostname, sizeof(hostname)); - - opal_show_help("help-orte-pls-fork.txt", - "orte-pls-fork:could-not-kill", - true, hostname, pid); - } - } - - /* update the process state on the registry */ - proc = OPAL_VALUE_ARRAY_GET_ITEM(procs, orte_process_name_t, i); - if (ORTE_SUCCESS != (rc = orte_smr.set_proc_state(&proc, ORTE_PROC_STATE_TERMINATED, exit_status))) { - ORTE_ERROR_LOG(rc); - /* don't exit out even if this didn't work - we still might need to kill more - * processes, so just keep trucking - */ - } - } - - /* Release any waiting threads from this process */ - OPAL_THREAD_LOCK(&mca_pls_fork_component.lock); - mca_pls_fork_component.num_children = 0; - opal_condition_signal(&mca_pls_fork_component.cond); - OPAL_THREAD_UNLOCK(&mca_pls_fork_component.lock); -} - -/* - * Wait for a callback indicating the child has completed. - */ - -static void orte_pls_fork_wait_proc(pid_t pid, int status, void* cbdata) -{ - orte_rmaps_base_proc_t* proc = (orte_rmaps_base_proc_t*)cbdata; - int rc; - - /* Clean up the session directory as if we were the process - itself. This covers the case where the process died abnormally - and didn't cleanup its own session directory. */ - orte_session_dir_finalize(&proc->proc_name); - orte_iof.iof_flush(); - - /* set the state of this process */ - if(WIFEXITED(status)) { - rc = orte_smr.set_proc_state(&proc->proc_name, ORTE_PROC_STATE_TERMINATED, status); - } else { - rc = orte_smr.set_proc_state(&proc->proc_name, ORTE_PROC_STATE_ABORTED, status); - } - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - } - OBJ_RELEASE(proc); - - /* release any waiting threads */ - OPAL_THREAD_LOCK(&mca_pls_fork_component.lock); - mca_pls_fork_component.num_children--; - opal_condition_signal(&mca_pls_fork_component.cond); - OPAL_THREAD_UNLOCK(&mca_pls_fork_component.lock); -} - -/** - * Fork/exec the specified processes - */ - -static int orte_pls_fork_proc( - orte_app_context_t* context, - orte_rmaps_base_proc_t* proc, - orte_vpid_t vpid_start, - orte_vpid_t vpid_range, - bool want_processor, - size_t processor) -{ - pid_t pid; - orte_iof_base_io_conf_t opts; - int rc; - sigset_t sigs; - orte_vpid_t vpid; - int i = 0, p[2]; - - /* should pull this information from MPIRUN instead of going with - default */ - opts.usepty = OMPI_ENABLE_PTY_SUPPORT; - - /* BWB - Fix post beta. Should setup stdin in orterun and - make part of the app_context */ - if (ORTE_SUCCESS == orte_ns.get_vpid(&vpid, &proc->proc_name) && - vpid == 0) { - opts.connect_stdin = true; - } else { - opts.connect_stdin = false; - } - - rc = orte_iof_base_setup_prefork(&opts); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - /* A pipe is used to communicate between the parent and child to - indicate whether the exec ultiimately succeeded or failed. The - child sets the pipe to be close-on-exec; the child only ever - writes anything to the pipe if there is an error (e.g., - executable not found, exec() fails, etc.). The parent does a - blocking read on the pipe; if the pipe closed with no data, - then the exec() succeeded. If the parent reads something from - the pipe, then the child was letting us know that it failed. */ - if (pipe(p) < 0) { - ORTE_ERROR_LOG(ORTE_ERR_IN_ERRNO); - return ORTE_ERR_IN_ERRNO; - } - - /* Fork off the child */ - pid = fork(); - if(pid < 0) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (pid == 0) { - char *param, *param2; - char *uri; - char **environ_copy; - long fd, fdmax = sysconf(_SC_OPEN_MAX); - - /* Setup the pipe to be close-on-exec */ - close(p[0]); - fcntl(p[1], F_SETFD, FD_CLOEXEC); - - /* setup stdout/stderr so that any error messages that we may - print out will get displayed back at orterun */ - orte_iof_base_setup_child(&opts); - - /* Try to change to the context cwd and check that the app - exists and is executable */ - if (ORTE_SUCCESS != orte_pls_base_check_context_cwd(context, true) || - ORTE_SUCCESS != orte_pls_base_check_context_app(context)) { - /* Tell the parent that Badness happened */ - write(p[1], &i, sizeof(int)); - exit(-1); - } - - /* setup base environment: copy the current environ and merge - in the app context environ */ - if (NULL != context->env) { - environ_copy = opal_environ_merge(environ, context->env); - } else { - environ_copy = opal_argv_copy(environ); - } - - /* special case handling for --prefix: this is somewhat icky, - but at least some users do this. :-\ It is possible that - when using --prefix, the user will also "-x PATH" and/or - "-x LD_LIBRARY_PATH", which would therefore clobber the - work that was done in the prior pls to ensure that we have - the prefix at the beginning of the PATH and - LD_LIBRARY_PATH. So examine the context->env and see if we - find PATH or LD_LIBRARY_PATH. If found, that means the - prior work was clobbered, and we need to re-prefix those - variables. */ - for (i = 0; NULL != context->env && NULL != context->env[i]; ++i) { - char *newenv; - - /* Reset PATH */ - if (0 == strncmp("PATH=", context->env[i], 5)) { - asprintf(&newenv, "%s/bin:%s", - context->prefix_dir, context->env[i] + 5); - opal_setenv("PATH", newenv, true, &environ_copy); - free(newenv); - } - - /* Reset LD_LIBRARY_PATH */ - else if (0 == strncmp("LD_LIBRARY_PATH=", context->env[i], 16)) { - asprintf(&newenv, "%s/lib:%s", - context->prefix_dir, context->env[i] + 16); - opal_setenv("LD_LIBRARY_PATH", newenv, true, &environ_copy); - free(newenv); - } - } - - param = mca_base_param_environ_variable("rmgr","bootproxy","jobid"); - opal_unsetenv(param, &environ_copy); - free(param); - - /* Set the relative vpid */ - - if (want_processor) { - param = mca_base_param_environ_variable("mpi", NULL, - "paffinity_processor"); - asprintf(¶m2, "%lu", (unsigned long) processor); - opal_setenv(param, param2, true, &environ_copy); - free(param); - free(param2); - } - - /* setup universe info */ - if (NULL != orte_universe_info.name) { - param = mca_base_param_environ_variable("universe", NULL, NULL); - asprintf(&uri, "%s@%s:%s", orte_universe_info.uid, - orte_universe_info.host, - orte_universe_info.name); - opal_setenv(param, uri, true, &environ_copy); - free(param); - free(uri); - } - - /* setup ns contact info */ - if(NULL != orte_process_info.ns_replica_uri) { - uri = strdup(orte_process_info.ns_replica_uri); - } else { - uri = orte_rml.get_uri(); - } - param = mca_base_param_environ_variable("ns","replica","uri"); - opal_setenv(param, uri, true, &environ_copy); - free(param); - free(uri); - - /* setup gpr contact info */ - if(NULL != orte_process_info.gpr_replica_uri) { - uri = strdup(orte_process_info.gpr_replica_uri); - } else { - uri = orte_rml.get_uri(); - } - param = mca_base_param_environ_variable("gpr","replica","uri"); - opal_setenv(param, uri, true, &environ_copy); - free(param); - free(uri); - - /* use same nodename as the starting daemon (us) */ - param = mca_base_param_environ_variable("orte", "base", "nodename"); - opal_setenv(param, orte_system_info.nodename, true, &environ_copy); - free(param); - - /* push name into environment */ - orte_ns_nds_env_put(&proc->proc_name, vpid_start, vpid_range, - &environ_copy); - - /* close all file descriptors w/ exception of stdin/stdout/stderr */ - for(fd=3; fdargv == NULL) { - context->argv = malloc(sizeof(char*)*2); - context->argv[0] = strdup(context->app); - context->argv[1] = NULL; - } - - /* Set signal handlers back to the default. Do this close to - the exev() because the event library may (and likely will) - reset them. If we don't do this, the event library may - have left some set that, at least on some OS's, don't get - reset via fork() or exec(). Hence, the launched process - could be unkillable (for example). */ - - set_handler_default(SIGTERM); - set_handler_default(SIGINT); - set_handler_default(SIGHUP); - set_handler_default(SIGPIPE); - set_handler_default(SIGCHLD); - - /* Unblock all signals, for many of the same reasons that we - set the default handlers, above. This is noticable on - Linux where the event library blocks SIGTERM, but we don't - want that blocked by the launched process. */ - sigprocmask(0, 0, &sigs); - sigprocmask(SIG_UNBLOCK, &sigs, 0); - - /* Exec the new executable */ - - execve(context->app, context->argv, environ_copy); - opal_show_help("help-orte-pls-fork.txt", "orte-pls-fork:execv-error", - true, context->app, strerror(errno)); - exit(-1); - } else { - - /* connect endpoints IOF */ - rc = orte_iof_base_setup_parent(&proc->proc_name, &opts); - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* Wait to read something from the pipe or close */ - close(p[1]); - while (1) { - rc = read(p[0], &i, sizeof(int)); - if (rc < 0) { - /* Signal interrupts are ok */ - if (errno == EINTR) { - continue; - } - /* Other errno's are bad */ - return ORTE_ERR_IN_ERRNO; - break; - } else if (0 == rc) { - /* Child was successful in exec'ing! */ - break; - } else { - /* Doh -- child failed. The child already printed a - suitable error message, so disable all - ORTE_ERROR_LOG reporting after this. Must also - report the failure to launch this process through - the SOH or else everyone else will hang. Don't bother - checking whether or not this worked - just fire and forget - */ - orte_smr.set_proc_state(&proc->proc_name, ORTE_PROC_STATE_ABORTED, rc); - return ORTE_ERR_FATAL; - break; - } - } - - /* save the pid in the registry */ - if (ORTE_SUCCESS != - (rc = orte_pls_base_set_proc_pid(&proc->proc_name, pid))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* wait for the child process - dont register for wait - * callback until after I/O is setup and the pid registered - - * otherwise can receive the wait callback before the above is - * ever completed - */ - OPAL_THREAD_LOCK(&mca_pls_fork_component.lock); - mca_pls_fork_component.num_children++; - OPAL_THREAD_UNLOCK(&mca_pls_fork_component.lock); - OBJ_RETAIN(proc); - orte_wait_cb(pid, orte_pls_fork_wait_proc, proc); - } - return ORTE_SUCCESS; -} - - -/** - * Launch all processes allocated to the current node. - */ - -int orte_pls_fork_launch(orte_jobid_t jobid) -{ - opal_list_t map; - opal_list_item_t* item; - orte_vpid_t vpid_start; - orte_vpid_t vpid_range; - int rc; - size_t num_processors, num_processes; - - /* query the allocation for this node */ - OBJ_CONSTRUCT(&map, opal_list_t); - rc = orte_rmaps_base_get_node_map( - orte_process_info.my_name->cellid,jobid,orte_system_info.nodename,&map); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - rc = orte_rmaps_base_get_vpid_range(jobid, &vpid_start, &vpid_range); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - /* are we oversubscribing? */ - opal_paffinity_base_get_num_processors(&rc); - num_processors = (size_t) rc; - for (num_processes = 0, item = opal_list_get_first(&map); - item != opal_list_get_end(&map); - item = opal_list_get_next(item)) { - orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)item; - num_processes += map->num_procs; - } - - /* attempt to launch each of the apps */ - for (item = opal_list_get_first(&map); - item != opal_list_get_end(&map); - item = opal_list_get_next(item)) { - orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*)item; - orte_std_cntr_t i; - for (i=0; inum_procs; i++) { - rc = orte_pls_fork_proc(map->app, map->procs[i], vpid_start, - vpid_range, - (num_processes > num_processors) ? - false : true, i); - - if (ORTE_SUCCESS != rc) { - /* Set the state of this process, and all remaining - processes to be launched to ABORTED. This will - cause the entire job to abort. */ - for (; i < map->num_procs; ++i) { - orte_smr.set_proc_state(&map->procs[i]->proc_name, - ORTE_PROC_STATE_ABORTED, 0); - } - - /* Propagate the error up the stack */ - ORTE_ERROR_LOG(rc); - goto cleanup; - } - } - } - -cleanup: - while(NULL != (item = opal_list_remove_first(&map))) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&map); - return rc; -} - -/** - * Query for all processes allocated to the job and terminate - * those on the current node. - */ - -int orte_pls_fork_terminate_job(orte_jobid_t jobid) -{ - /* query for the pids allocated on this node */ - char *segment; - char *keys[] = { - ORTE_PROC_NAME_KEY, - ORTE_NODE_NAME_KEY, - ORTE_PROC_PID_KEY, - NULL - }; - orte_gpr_value_t** values = NULL; - orte_std_cntr_t i, k, num_values = 0; - int rc; - opal_value_array_t pids, procs; - orte_process_name_t proc, *procptr; - - /* setup the pid array */ - OBJ_CONSTRUCT(&pids, opal_value_array_t); - opal_value_array_init(&pids, sizeof(pid_t)); - - /* setup the process name array */ - OBJ_CONSTRUCT(&procs, opal_value_array_t); - opal_value_array_init(&procs, sizeof(orte_process_name_t)); - - /* query the job segment on the registry */ - if(ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - rc = orte_gpr.get( - ORTE_GPR_KEYS_AND|ORTE_GPR_TOKENS_OR, - segment, - NULL, - keys, - &num_values, - &values - ); - if(rc != ORTE_SUCCESS) { - free(segment); - return rc; - } - - free(segment); /* done with this */ - - for(i=0; icnt; k++) { - orte_gpr_keyval_t* keyval = value->keyvals[k]; - if (strcmp(keyval->key, ORTE_NODE_NAME_KEY) == 0) { - if(orte_dss.compare(keyval->value->data, orte_system_info.nodename, ORTE_STRING) != ORTE_EQUAL) { - break; - } - } else if (strcmp(keyval->key, ORTE_PROC_PID_KEY) == 0) { - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pidptr, keyval->value, ORTE_PID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - pid = *pidptr; - } else if (strcmp(keyval->key, ORTE_PROC_NAME_KEY) == 0) { - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&procptr, keyval->value, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - return rc; - } - proc = *procptr; - } - } - if (0 != pid) { - opal_value_array_append_item(&pids, &pid); - opal_value_array_append_item(&procs, &proc); - } - OBJ_RELEASE(value); - } - - /* If we have processes to kill, go kill them */ - if (opal_value_array_get_size(&pids) > 0) { - orte_pls_fork_kill_processes(&pids, &procs); - } - OBJ_DESTRUCT(&pids); - OBJ_DESTRUCT(&procs); - - if(NULL != values) { - free(values); - } - free(segment); - return ORTE_SUCCESS; -} - - -int orte_pls_fork_terminate_proc(const orte_process_name_t* proc) -{ - return ORTE_ERR_NOT_IMPLEMENTED; -} - -/** - * Query for all processes allocated to the job and signal - * those on the current node. - */ - -int orte_pls_fork_signal_job(orte_jobid_t jobid, int32_t signal) -{ - /* query for the pids allocated on this node */ - char *segment; - char *keys[3]; - orte_gpr_value_t** values = NULL; - orte_std_cntr_t i, k, num_values = 0; - int rc; - opal_value_array_t pids; - pid_t pid; - - /* setup the pid array */ - OBJ_CONSTRUCT(&pids, opal_value_array_t); - opal_value_array_init(&pids, sizeof(pid_t)); - - /* query the job segment on the registry */ - if(ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - keys[0] = ORTE_NODE_NAME_KEY; - keys[1] = ORTE_PROC_PID_KEY; - keys[2] = NULL; - - rc = orte_gpr.get( - ORTE_GPR_KEYS_AND|ORTE_GPR_TOKENS_OR, - segment, - NULL, - keys, - &num_values, - &values - ); - if(rc != ORTE_SUCCESS) { - free(segment); - return rc; - } - - for(i=0; icnt; k++) { - orte_gpr_keyval_t* keyval = value->keyvals[k]; - if(strcmp(keyval->key, ORTE_NODE_NAME_KEY) == 0) { - if(orte_dss.compare(keyval->value->data, orte_system_info.nodename, ORTE_STRING) != ORTE_EQUAL) { - break; - } - } else if (strcmp(keyval->key, ORTE_PROC_PID_KEY) == 0) { - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&pidptr, keyval->value, ORTE_PID))) { - ORTE_ERROR_LOG(rc); - free(segment); - return rc; - } - pid = *pidptr; - } - } - if (0 != pid) { - opal_value_array_append_item(&pids, &pid); - } - OBJ_RELEASE(value); - } - - rc = ORTE_SUCCESS; - /* If we have processes to signal, go signal them */ - for (i = 0; i < (orte_std_cntr_t)opal_value_array_get_size(&pids); ++i) { - pid = OPAL_VALUE_ARRAY_GET_ITEM(&pids, pid_t, i); - if(kill(pid, (int)signal) != 0) { - switch(errno) { - case EINVAL: - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - rc = ORTE_ERR_BAD_PARAM; - break; - case ESRCH: - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - rc = ORTE_ERR_NOT_FOUND; - break; - case EPERM: - ORTE_ERROR_LOG(ORTE_ERR_PERM); - rc = ORTE_ERR_PERM; - break; - default: - ORTE_ERROR_LOG(ORTE_ERROR); - rc = ORTE_ERROR; - } - } - } - - OBJ_DESTRUCT(&pids); - - if(NULL != values) { - free(values); - } - free(segment); - - return rc; -} - - -int orte_pls_fork_signal_proc(const orte_process_name_t* proc, int32_t signal) -{ - return ORTE_ERR_NOT_IMPLEMENTED; -} - -int orte_pls_fork_finalize(void) -{ - if(mca_pls_fork_component.reap) { - OPAL_THREAD_LOCK(&mca_pls_fork_component.lock); - while(mca_pls_fork_component.num_children > 0) { - opal_condition_wait(&mca_pls_fork_component.cond, - &mca_pls_fork_component.lock); - } - OPAL_THREAD_UNLOCK(&mca_pls_fork_component.lock); - } - return ORTE_SUCCESS; -} - - -/** - * Handle threading issues. - */ - -#if OMPI_HAVE_POSIX_THREADS && OMPI_THREADS_HAVE_DIFFERENT_PIDS && OMPI_ENABLE_PROGRESS_THREADS - -struct orte_pls_fork_stack_t { - opal_condition_t cond; - opal_mutex_t mutex; - bool complete; - orte_jobid_t jobid; - int rc; -}; -typedef struct orte_pls_fork_stack_t orte_pls_fork_stack_t; - -static void orte_pls_fork_stack_construct(orte_pls_fork_stack_t* stack) -{ - OBJ_CONSTRUCT(&stack->mutex, opal_mutex_t); - OBJ_CONSTRUCT(&stack->cond, opal_condition_t); - stack->rc = 0; - stack->complete = false; -} - -static void orte_pls_fork_stack_destruct(orte_pls_fork_stack_t* stack) -{ - OBJ_DESTRUCT(&stack->mutex); - OBJ_DESTRUCT(&stack->cond); -} - -static OBJ_CLASS_INSTANCE( - orte_pls_fork_stack_t, - opal_object_t, - orte_pls_fork_stack_construct, - orte_pls_fork_stack_destruct); - - -static void orte_pls_fork_launch_cb(int fd, short event, void* args) -{ - orte_pls_fork_stack_t *stack = (orte_pls_fork_stack_t*)args; - OPAL_THREAD_LOCK(&stack->mutex); - stack->rc = orte_pls_fork_launch(stack->jobid); - stack->complete = true; - opal_condition_signal(&stack->cond); - OPAL_THREAD_UNLOCK(&stack->mutex); -} - -static int orte_pls_fork_launch_threaded(orte_jobid_t jobid) -{ - - struct timeval tv = { 0, 0 }; - struct opal_event event; - struct orte_pls_fork_stack_t stack; - - OBJ_CONSTRUCT(&stack, orte_pls_fork_stack_t); - - stack.jobid = jobid; - opal_evtimer_set(&event, orte_pls_fork_launch_cb, &stack); - opal_evtimer_add(&event, &tv); - - OPAL_THREAD_LOCK(&stack.mutex); - while(false == stack.complete) { - opal_condition_wait(&stack.cond, &stack.mutex); - } - OPAL_THREAD_UNLOCK(&stack.mutex); - OBJ_DESTRUCT(&stack); - return stack.rc; -} - -#endif - - - -static void set_handler_default(int sig) -{ - struct sigaction act; - - act.sa_handler = SIG_DFL; - act.sa_flags = 0; - sigemptyset(&act.sa_mask); - - sigaction(sig, &act, (struct sigaction *)0); -} diff --git a/orte/mca/pls/gridengine/pls_gridengine.h b/orte/mca/pls/gridengine/pls_gridengine.h index f4634dea9c..efdfbd9ee5 100644 --- a/orte/mca/pls/gridengine/pls_gridengine.h +++ b/orte/mca/pls/gridengine/pls_gridengine.h @@ -112,8 +112,9 @@ int orte_pls_gridengine_finalize(void); /* * Interface */ -int orte_pls_gridengine_launch(orte_jobid_t); +int orte_pls_gridengine_launch_job(orte_jobid_t); int orte_pls_gridengine_terminate_job(orte_jobid_t); +int orte_pls_gridengine_terminate_orteds(orte_jobid_t); int orte_pls_gridengine_terminate_proc(const orte_process_name_t*); int orte_pls_gridengine_signal_job(orte_jobid_t, int32_t); int orte_pls_gridengine_signal_proc(const orte_process_name_t*, int32_t); diff --git a/orte/mca/pls/gridengine/pls_gridengine_component.c b/orte/mca/pls/gridengine/pls_gridengine_component.c index a250b09616..d6e62d58ac 100644 --- a/orte/mca/pls/gridengine/pls_gridengine_component.c +++ b/orte/mca/pls/gridengine/pls_gridengine_component.c @@ -31,15 +31,19 @@ #include "orte_config.h" #include "orte/orte_constants.h" -#include "orte/mca/pls/pls.h" -#include "pls_gridengine.h" + #include "opal/util/path.h" #include "opal/util/argv.h" -#include "opal/mca/base/mca_base_param.h" -#include "orte/mca/pls/base/base.h" #include "opal/util/output.h" -#include "orte/mca/ras/base/base.h" -#include "orte/mca/ras/base/ras_base_node.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/util/proc_info.h" +#include "orte/mca/errmgr/errmgr.h" + +#include "orte/mca/pls/pls.h" +#include "orte/mca/pls/base/base.h" +#include "orte/mca/pls/base/pls_private.h" +#include "pls_gridengine.h" /** * Public string showing the pls ompi_gridengine component version number @@ -62,10 +66,10 @@ orte_pls_gridengine_component_t mca_pls_gridengine_component = { about the component itself */ { - /* Indicate that we are a pls v1.0.0 component (which also + /* Indicate that we are a pls v1.3.0 component (which also implies a specific MCA version) */ - ORTE_PLS_BASE_VERSION_1_0_0, + ORTE_PLS_BASE_VERSION_1_3_0, /* Component name and version */ @@ -137,10 +141,23 @@ orte_pls_gridengine_component_init - initialize component, check if we can run o */ orte_pls_base_module_t *orte_pls_gridengine_component_init(int *priority) { + int rc; + + /* if we are not an HNP, then don't select us */ + if (!orte_process_info.seed) { + return NULL; + } + if (NULL != getenv("SGE_ROOT") && NULL != getenv("ARC") && NULL != getenv("PE_HOSTFILE") && NULL != getenv("JOB_ID")) { opal_output(orte_pls_base.pls_output, "pls:gridengine: available for selection"); + + /* ensure the receive gets posted */ + if (ORTE_SUCCESS != (rc = orte_pls_base_comm_start())) { + ORTE_ERROR_LOG(rc); + } + *priority = mca_pls_gridengine_component.priority; return &orte_pls_gridengine_module; } diff --git a/orte/mca/pls/gridengine/pls_gridengine_module.c b/orte/mca/pls/gridengine/pls_gridengine_module.c index 24c775730e..fbe98bf900 100644 --- a/orte/mca/pls/gridengine/pls_gridengine_module.c +++ b/orte/mca/pls/gridengine/pls_gridengine_module.c @@ -31,6 +31,7 @@ */ #include "orte_config.h" +#include "orte/orte_constants.h" #include #ifdef HAVE_UNISTD_H @@ -70,30 +71,33 @@ #include "opal/util/opal_environ.h" #include "opal/util/output.h" #include "opal/util/basename.h" -#include "orte/orte_constants.h" + #include "orte/util/univ_info.h" #include "orte/util/session_dir.h" +#include "orte/util/sys_info.h" #include "orte/runtime/orte_wait.h" #include "orte/mca/ns/ns.h" -#include "orte/mca/pls/pls.h" -#include "orte/mca/pls/base/base.h" #include "orte/mca/rml/rml.h" #include "orte/mca/gpr/gpr.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/ras/base/ras_base_node.h" -#include "orte/mca/rmaps/base/rmaps_base_map.h" -#include "orte/mca/rmgr/base/base.h" +#include "orte/mca/ras/ras_types.h" #include "orte/mca/smr/smr.h" + +/* clean up for ORTE 2.0 */ +#include "orte/mca/rmaps/base/rmaps_private.h" + +#include "orte/mca/pls/pls.h" +#include "orte/mca/pls/base/pls_private.h" #include "orte/mca/pls/gridengine/pls_gridengine.h" -#include "orte/util/sys_info.h" #if !defined(__WINDOWS__) extern char **environ; #endif /* !defined(__WINDOWS__) */ -orte_pls_base_module_1_0_0_t orte_pls_gridengine_module = { - orte_pls_gridengine_launch, +orte_pls_base_module_t orte_pls_gridengine_module = { + orte_pls_gridengine_launch_job, orte_pls_gridengine_terminate_job, + orte_pls_gridengine_terminate_orteds, orte_pls_gridengine_terminate_proc, orte_pls_gridengine_signal_job, orte_pls_gridengine_signal_proc, @@ -106,8 +110,8 @@ orte_pls_base_module_1_0_0_t orte_pls_gridengine_module = { */ struct gridengine_daemon_info_t { opal_object_t super; - orte_ras_node_t* node; - orte_jobid_t jobid; + orte_process_name_t *name; + char *nodename; }; typedef struct gridengine_daemon_info_t gridengine_daemon_info_t; static OBJ_CLASS_INSTANCE(gridengine_daemon_info_t, @@ -143,71 +147,36 @@ static int orte_pls_gridengine_fill_orted_path(char** orted_path) static void orte_pls_gridengine_wait_daemon(pid_t pid, int status, void* cbdata) { gridengine_daemon_info_t *info = (gridengine_daemon_info_t*) cbdata; - opal_list_t map; - opal_list_item_t* item; int rc; - /* if qrsh exited abnormally, set the child processes to aborted + /* if qrsh exited abnormally, set the daemon's state to aborted and print something useful to the user. The usual reasons for qrsh to exit abnormally all are a pretty good indication that - the child processes aren't going to start up properly. + the child processes aren't going to start up properly, so this + will signal the system to kill the job. This should somehow be pushed up to the calling level, but we don't really have a way to do that just yet. */ if (! WIFEXITED(status) || ! WEXITSTATUS(status) == 0) { - /* get the mapping for our node so we can cancel the right things */ - OBJ_CONSTRUCT(&map, opal_list_t); - rc = orte_rmaps_base_get_node_map(orte_process_info.my_name->cellid, - info->jobid, - info->node->node_name, - &map); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - /* set state of all processes associated with the daemon as - terminated */ - for(item = opal_list_get_first(&map); - item != opal_list_get_end(&map); - item = opal_list_get_next(item)) { - orte_rmaps_base_map_t* map = (orte_rmaps_base_map_t*) item; - orte_std_cntr_t i; - - for (i = 0 ; i < map->num_procs ; ++i) { - /* Clean up the session directory as if we were the - process itself. This covers the case where the - process died abnormally and didn't cleanup its own - session directory. */ - - orte_session_dir_finalize(&(map->procs[i])->proc_name); - - rc = orte_smr.set_proc_state(&(map->procs[i]->proc_name), - ORTE_PROC_STATE_ABORTED, status); - } - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - } - } - OBJ_DESTRUCT(&map); - - cleanup: - /* tell the user something went wrong */ + /* tell the user something went wrong. We need to do this BEFORE we + * set the state to ABORTED as that action will cause a trigger to + * fire that will kill the job before any output would get printed! + */ opal_output(0, "ERROR: A daemon on node %s failed to start as expected.", - info->node->node_name); + info->nodename); opal_output(0, "ERROR: There may be more information available from"); opal_output(0, "ERROR: the 'qstat -t' command on the Grid Engine tasks."); opal_output(0, "ERROR: If the problem persists, please restart the"); opal_output(0, "ERROR: Grid Engine PE job"); if (WIFEXITED(status)) { opal_output(0, "ERROR: The daemon exited unexpectedly with status %d.", - WEXITSTATUS(status)); + WEXITSTATUS(status)); } else if (WIFSIGNALED(status)) { #ifdef WCOREDUMP if (WCOREDUMP(status)) { opal_output(0, "The daemon received a signal %d (with core).", - WTERMSIG(status)); + WTERMSIG(status)); } else { opal_output(0, "The daemon received a signal %d.", WTERMSIG(status)); } @@ -217,10 +186,15 @@ static void orte_pls_gridengine_wait_daemon(pid_t pid, int status, void* cbdata) } else { opal_output(0, "No extra status information is available: %d.", status); } + + /* now set the state to aborted */ + rc = orte_smr.set_proc_state(info->name, ORTE_PROC_STATE_ABORTED, status); + if (ORTE_SUCCESS != rc) { + ORTE_ERROR_LOG(rc); + } } /* cleanup */ - OBJ_RELEASE(info->node); OBJ_RELEASE(info); } @@ -228,7 +202,7 @@ static void orte_pls_gridengine_wait_daemon(pid_t pid, int status, void* cbdata) * Launch a daemon (bootproxy) on each node. The daemon will be responsible * for launching the application. */ -int orte_pls_gridengine_launch(orte_jobid_t jobid) +int orte_pls_gridengine_launch_job(orte_jobid_t jobid) { opal_list_t mapping; opal_list_item_t* m_item, *n_item; @@ -247,7 +221,14 @@ int orte_pls_gridengine_launch(orte_jobid_t jobid) sigset_t sigs; char *lib_base = NULL, *bin_base = NULL; char *sge_root, *sge_arch; - + opal_list_t daemons; + orte_pls_daemon_info_t *dmn; + + /* setup a list that will contain the info for all the daemons + * so we can store it on the registry when done + */ + OBJ_CONSTRUCT(&daemons, opal_list_t); + /* Query the list of nodes allocated and mapped to this job. * We need the entire mapping for a couple of reasons: * - need the prefix to start with. @@ -257,6 +238,7 @@ int orte_pls_gridengine_launch(orte_jobid_t jobid) OBJ_CONSTRUCT(&mapping, opal_list_t); rc = orte_rmaps_base_get_map(jobid, &mapping); if (ORTE_SUCCESS != rc) { + ORTE_ERROR_LOG(rc); goto cleanup; } @@ -272,13 +254,22 @@ int orte_pls_gridengine_launch(orte_jobid_t jobid) * Allocate a range of vpids for the daemons. */ if (num_nodes == 0) { - return ORTE_ERR_BAD_PARAM; + rc = ORTE_ERR_BAD_PARAM; + ORTE_ERROR_LOG(rc); + goto cleanup; } rc = orte_ns.reserve_range(0, num_nodes, &vpid); if (ORTE_SUCCESS != rc) { + ORTE_ERROR_LOG(rc); goto cleanup; } + /* setup the orted triggers for passing their launch info */ + if (ORTE_SUCCESS != (rc = orte_smr.init_orted_stage_gates(jobid, num_nodes, NULL, NULL))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + /* need integer value for command line parameter */ if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, jobid))) { ORTE_ERROR_LOG(rc); @@ -309,7 +300,7 @@ int orte_pls_gridengine_launch(orte_jobid_t jobid) opal_argv_append(&argc, &argv, "--no-daemonize"); /* check for debug flags */ - orte_pls_base_proxy_mca_argv(&argc, &argv); + orte_pls_base_mca_argv(&argc, &argv); opal_argv_append(&argc, &argv, "--bootproxy"); opal_argv_append(&argc, &argv, jobid_string); @@ -451,6 +442,17 @@ int orte_pls_gridengine_launch(orte_jobid_t jobid) goto cleanup; } + /* new daemon - setup to record its info */ + dmn = OBJ_NEW(orte_pls_daemon_info_t); + dmn->active_job = jobid; + dmn->cell = ras_node->node_cellid; + dmn->nodename = strdup(ras_node->node_name); + if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + opal_list_append(&daemons, &dmn->super); + #ifdef __WINDOWS__ printf("Unimplemented feature for windows\n"); return ORTE_ERR_NOT_IMPLEMENTED; @@ -659,19 +661,14 @@ int orte_pls_gridengine_launch(orte_jobid_t jobid) opal_output(0, "pls:gridengine: parent"); } - /* save the daemons name on the node */ - if (ORTE_SUCCESS != (rc = orte_pls_base_proxy_set_node_name(ras_node,jobid,name))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - /* setup callback on sigchild - wait until setup above is complete * as the callback can occur in the call to orte_wait_cb */ daemon_info = OBJ_NEW(gridengine_daemon_info_t); - OBJ_RETAIN(ras_node); - daemon_info->node = ras_node; - daemon_info->jobid = jobid; + if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(daemon_info->name), name, ORTE_NAME))) { + ORTE_ERROR_LOG(rc); + } + daemon_info->nodename= strdup(ras_node->node_name); orte_wait_cb(pid, orte_pls_gridengine_wait_daemon, daemon_info); vpid++; @@ -679,6 +676,12 @@ int orte_pls_gridengine_launch(orte_jobid_t jobid) free(name); } } + + /* all done, so store the daemon info on the registry */ + if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) { + ORTE_ERROR_LOG(rc); + } + cleanup: while (NULL != (m_item = opal_list_remove_first(&mapping))) { @@ -686,6 +689,11 @@ int orte_pls_gridengine_launch(orte_jobid_t jobid) } OBJ_DESTRUCT(&mapping); + while (NULL != (m_item = opal_list_remove_first(&daemons))) { + OBJ_RELEASE(m_item); + } + OBJ_DESTRUCT(&daemons); + if (NULL != lib_base) { free(lib_base); } @@ -806,12 +814,63 @@ static int update_slot_keyval(orte_ras_node_t* ras_node, int* slot_cnt) */ int orte_pls_gridengine_terminate_job(orte_jobid_t jobid) { - return orte_pls_base_proxy_terminate_job(jobid); + int rc; + opal_list_t daemons; + opal_list_item_t *item; + + /* construct the list of active daemons on this job */ + OBJ_CONSTRUCT(&daemons, opal_list_t); + if (ORTE_SUCCESS != (rc = orte_pls_base_get_active_daemons(&daemons, jobid))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* order them to kill their local procs for this job */ + if (ORTE_SUCCESS != (rc = orte_pls_base_orted_kill_local_procs(&daemons, jobid))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + +CLEANUP: + while (NULL != (item = opal_list_remove_first(&daemons))) { + OBJ_RELEASE(item); + } + OBJ_DESTRUCT(&daemons); + return rc; } int orte_pls_gridengine_terminate_proc(const orte_process_name_t* proc) { - return orte_pls_base_proxy_terminate_proc(proc); + return ORTE_ERR_NOT_IMPLEMENTED; +} + +/** + * Terminate the orteds for a given job + */ +int orte_pls_gridengine_terminate_orteds(orte_jobid_t jobid) +{ + int rc; + opal_list_t daemons; + opal_list_item_t *item; + + /* construct the list of active daemons on this job */ + OBJ_CONSTRUCT(&daemons, opal_list_t); + if (ORTE_SUCCESS != (rc = orte_pls_base_get_active_daemons(&daemons, jobid))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* now tell them to die! */ + if (ORTE_SUCCESS != (rc = orte_pls_base_orted_exit(&daemons))) { + ORTE_ERROR_LOG(rc); + } + +CLEANUP: + while (NULL != (item = opal_list_remove_first(&daemons))) { + OBJ_RELEASE(item); + } + OBJ_DESTRUCT(&daemons); + return rc; } /** @@ -819,7 +878,28 @@ int orte_pls_gridengine_terminate_proc(const orte_process_name_t* proc) */ int orte_pls_gridengine_signal_job(orte_jobid_t jobid, int32_t signal) { - return orte_pls_base_proxy_signal_job(jobid, signal); + int rc; + opal_list_t daemons; + opal_list_item_t *item; + + /* construct the list of active daemons on this job */ + OBJ_CONSTRUCT(&daemons, opal_list_t); + if (ORTE_SUCCESS != (rc = orte_pls_base_get_active_daemons(&daemons, jobid))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&daemons); + return rc; + } + + /* order them to pass this signal to their local procs */ + if (ORTE_SUCCESS != (rc = orte_pls_base_orted_signal_local_procs(&daemons, signal))) { + ORTE_ERROR_LOG(rc); + } + + while (NULL != (item = opal_list_remove_first(&daemons))) { + OBJ_RELEASE(item); + } + OBJ_DESTRUCT(&daemons); + return rc; } /** @@ -827,7 +907,7 @@ int orte_pls_gridengine_signal_job(orte_jobid_t jobid, int32_t signal) */ int orte_pls_gridengine_signal_proc(const orte_process_name_t* proc, int32_t signal) { - return orte_pls_base_proxy_signal_proc(proc, signal); + return ORTE_ERR_NOT_IMPLEMENTED; } /** @@ -835,8 +915,12 @@ int orte_pls_gridengine_signal_proc(const orte_process_name_t* proc, int32_t sig */ int orte_pls_gridengine_finalize(void) { + int rc; + /* cleanup any pending recvs */ - orte_rml.recv_cancel(ORTE_RML_NAME_ANY, ORTE_RML_TAG_RMGR_CLNT); + if (ORTE_SUCCESS != (rc = orte_pls_base_comm_stop())) { + ORTE_ERROR_LOG(rc); + } return ORTE_SUCCESS; } diff --git a/orte/mca/pls/pls.h b/orte/mca/pls/pls.h index a1318025b2..cef06ccb7a 100644 --- a/orte/mca/pls/pls.h +++ b/orte/mca/pls/pls.h @@ -196,7 +196,7 @@ /** * Launch the indicated jobid */ -typedef int (*orte_pls_base_module_launch_fn_t)(orte_jobid_t); +typedef int (*orte_pls_base_module_launch_job_fn_t)(orte_jobid_t); /** * Terminate any processes launched for the respective jobid by @@ -204,6 +204,11 @@ typedef int (*orte_pls_base_module_launch_fn_t)(orte_jobid_t); */ typedef int (*orte_pls_base_module_terminate_job_fn_t)(orte_jobid_t); +/** + * Terminate the daemons associated with this jobid + */ +typedef int (*orte_pls_base_module_terminate_orteds_fn_t)(orte_jobid_t); + /** * Terminate a specific process. */ @@ -226,21 +231,22 @@ typedef int (*orte_pls_base_module_signal_proc_fn_t)(const orte_process_name_t*, typedef int (*orte_pls_base_module_finalize_fn_t)(void); /** - * pls module version 1.0.0 + * pls module version 1.3.0 */ -struct orte_pls_base_module_1_0_0_t { - orte_pls_base_module_launch_fn_t launch; - orte_pls_base_module_terminate_job_fn_t terminate_job; - orte_pls_base_module_terminate_proc_fn_t terminate_proc; - orte_pls_base_module_signal_job_fn_t signal_job; - orte_pls_base_module_signal_proc_fn_t signal_proc; - orte_pls_base_module_finalize_fn_t finalize; +struct orte_pls_base_module_1_3_0_t { + orte_pls_base_module_launch_job_fn_t launch_job; + orte_pls_base_module_terminate_job_fn_t terminate_job; + orte_pls_base_module_terminate_orteds_fn_t terminate_orteds; + orte_pls_base_module_terminate_proc_fn_t terminate_proc; + orte_pls_base_module_signal_job_fn_t signal_job; + orte_pls_base_module_signal_proc_fn_t signal_proc; + orte_pls_base_module_finalize_fn_t finalize; }; -/** shorten orte_pls_base_module_1_0_0_t declaration */ -typedef struct orte_pls_base_module_1_0_0_t orte_pls_base_module_1_0_0_t; +/** shorten orte_pls_base_module_1_3_0_t declaration */ +typedef struct orte_pls_base_module_1_3_0_t orte_pls_base_module_1_3_0_t; /** shorten orte_pls_base_module_t declaration */ -typedef struct orte_pls_base_module_1_0_0_t orte_pls_base_module_t; +typedef struct orte_pls_base_module_1_3_0_t orte_pls_base_module_t; /** * pls initialization function @@ -251,13 +257,13 @@ typedef struct orte_pls_base_module_1_0_0_t orte_pls_base_module_t; * @param priority (OUT) Relative priority or ranking use by MCA to * select a module. */ -typedef struct orte_pls_base_module_1_0_0_t* +typedef struct orte_pls_base_module_1_3_0_t* (*orte_pls_base_component_init_fn_t)(int *priority); /** - * pls component v1.0.0 + * pls component v1.3.0 */ -struct orte_pls_base_component_1_0_0_t { +struct orte_pls_base_component_1_3_0_t { /** component version */ mca_base_component_t pls_version; /** component data */ @@ -266,18 +272,23 @@ struct orte_pls_base_component_1_0_0_t { orte_pls_base_component_init_fn_t pls_init; }; /** Convenience typedef */ -typedef struct orte_pls_base_component_1_0_0_t orte_pls_base_component_1_0_0_t; +typedef struct orte_pls_base_component_1_3_0_t orte_pls_base_component_1_3_0_t; /** Convenience typedef */ -typedef orte_pls_base_component_1_0_0_t orte_pls_base_component_t; +typedef orte_pls_base_component_1_3_0_t orte_pls_base_component_t; /** * Macro for use in modules that are of type pls v1.0.0 */ -#define ORTE_PLS_BASE_VERSION_1_0_0 \ - /* pls v1.0 is chained to MCA v1.0 */ \ +#define ORTE_PLS_BASE_VERSION_1_3_0 \ + /* pls v1.3 is chained to MCA v1.0 */ \ MCA_BASE_VERSION_1_0_0, \ - /* pls v1.0 */ \ - "pls", 1, 0, 0 + /* pls v1.3 */ \ + "pls", 1, 3, 0 + +/* Global structure for accessing PLS functions +*/ +ORTE_DECLSPEC extern orte_pls_base_module_t orte_pls; /* holds selected module's function pointers */ + #endif /* MCA_PLS_H */ diff --git a/orte/mca/pls/pls_types.h b/orte/mca/pls/pls_types.h new file mode 100644 index 0000000000..52f8450891 --- /dev/null +++ b/orte/mca/pls/pls_types.h @@ -0,0 +1,36 @@ +/* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + */ + +#ifndef ORTE_MCA_PLS_TYPES_H +#define ORTE_MCA_PLS_TYPES_H + +#include "orte_config.h" +#include "orte/orte_types.h" + +/* + * General PLS types + */ +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif +#endif diff --git a/orte/mca/pls/poe/pls_poe_component.c b/orte/mca/pls/poe/pls_poe_component.c index aa026f7fce..1c88bb2456 100644 --- a/orte/mca/pls/poe/pls_poe_component.c +++ b/orte/mca/pls/poe/pls_poe_component.c @@ -31,6 +31,8 @@ #include "opal/util/argv.h" #include "opal/mca/base/mca_base_param.h" +#include "orte/util/proc_info.h" + /* * Public string showing the pls ompi_poe component version number @@ -55,10 +57,10 @@ orte_pls_poe_component_t mca_pls_poe_component = { about the component itself */ { - /* Indicate that we are a pls v1.0.0 component (which also + /* Indicate that we are a pls v1.3.0 component (which also implies a specific MCA version) */ - ORTE_PLS_BASE_VERSION_1_0_0, + ORTE_PLS_BASE_VERSION_1_3_0, /* Component name and version */ @@ -160,6 +162,12 @@ orte_pls_poe_component_init - initialize component, check if we can run on this */ orte_pls_base_module_t *orte_pls_poe_component_init(int *priority) { + + /* if we are NOT an HNP, then don't select us */ + if (!orte_process_info.seed) { + return NULL; + } + mca_pls_poe_component.path = opal_path_findv(mca_pls_poe_component.argv[0], 0, environ, NULL); if (NULL == mca_pls_poe_component.path) { return NULL; diff --git a/orte/mca/pls/poe/pls_poe_module.c b/orte/mca/pls/poe/pls_poe_module.c index 2d76fdf4ca..4d74046a46 100644 --- a/orte/mca/pls/poe/pls_poe_module.c +++ b/orte/mca/pls/poe/pls_poe_module.c @@ -23,31 +23,37 @@ */ #include "orte_config.h" +#include "orte/orte_constants.h" #include #include #ifdef HAVE_UNISTD_H #include #endif -#include "orte/orte_constants.h" + #include "opal/mca/base/mca_base_param.h" #include "opal/util/argv.h" #include "opal/util/opal_environ.h" + #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/gpr/gpr.h" -#include "orte/mca/pls/pls.h" -#include "orte/mca/pls/poe/pls_poe.h" #include "orte/mca/ns/ns.h" -#include "orte/mca/rmaps/base/base.h" -#include "orte/mca/rmaps/base/rmaps_base_map.h" -#include "orte/mca/rmgr/base/base.h" +#include "orte/mca/rmgr/rmgr.h" #include "orte/mca/rml/rml.h" -#include "orte/mca/sds/base/base.h" #include "orte/mca/smr/smr.h" #include "orte/util/univ_info.h" #include "orte/util/session_dir.h" #include "orte/runtime/orte_wait.h" +/* remove for ORTE 2.0 */ +#include "orte/mca/rmaps/base/rmaps_private.h" +#include "orte/mca/sds/base/base.h" +#include "orte/mca/rmgr/base/rmgr_private.h" + + +#include "orte/mca/pls/pls.h" +#include "orte/mca/pls/poe/pls_poe.h" + #if !defined(__WINDOWS__) extern char **environ; #endif /* !defined(__WINDOWS__) */ @@ -55,16 +61,18 @@ extern char **environ; /* * Local functions */ -static int pls_poe_launch(orte_jobid_t jobid); +static int pls_poe_launch_job(orte_jobid_t jobid); static int pls_poe_terminate_job(orte_jobid_t jobid); +static int pls_poe_terminate_orteds(orte_jobid_t jobid); static int pls_poe_terminate_proc(const orte_process_name_t *name); static int pls_poe_signal_job(orte_jobid_t jobid, int32_t signal); static int pls_poe_signal_proc(const orte_process_name_t *name, int32_t signal); static int pls_poe_finalize(void); -orte_pls_base_module_1_0_0_t orte_pls_poe_module = { - pls_poe_launch, +orte_pls_base_module_t orte_pls_poe_module = { + pls_poe_launch_job, pls_poe_terminate_job, + pls_poe_terminate_orteds, pls_poe_terminate_proc, pls_poe_signal_job, pls_poe_signal_proc, @@ -451,7 +459,7 @@ poe_launch_interactive - launch an interactive job @param jobid JOB Identifier [IN] @return error number */ -static inline int poe_launch_interactive(orte_jobid_t jobid) +static inline int poe_launch_interactive_job(orte_jobid_t jobid) { opal_list_t map, nodes, mapping_list; opal_list_item_t* item; @@ -480,7 +488,7 @@ static inline int poe_launch_interactive(orte_jobid_t jobid) if(!strncmp(mca_pls_poe_component.resource_allocation,"hostfile",8)) { - /* Create a tempolary hostlist file if user specify */ + /* Create a temporary hostlist file if user specify */ if( (NULL==(mca_pls_poe_component.hostfile=tempnam(NULL,NULL))) || (NULL==(hfp=fopen(mca_pls_poe_component.hostfile,"w"))) ) { @@ -502,10 +510,10 @@ static inline int poe_launch_interactive(orte_jobid_t jobid) rc = orte_rmaps_base_get_map(jobid,&map); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; } - rc = orte_rmaps_base_get_vpid_range(jobid, &vpid_start, &vpid_range); + rc = orte_rmgr.get_vpid_range(jobid, &vpid_start, &vpid_range); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; } - /* Create a tempolary POE command file */ + /* Create a temporary POE command file */ for(item = opal_list_get_first(&map); item != opal_list_get_end(&map); @@ -602,10 +610,10 @@ pls_poe_launch - launch a POE job @param jobid JOB Identifier [IN] @return error number */ -static int pls_poe_launch(orte_jobid_t jobid) +static int pls_poe_launch_job(orte_jobid_t jobid) { if(0 == strncmp(mca_pls_poe_component.class,"interactive",11)) { - return poe_launch_interactive(jobid); + return poe_launch_interactive_job(jobid); } return ORTE_ERR_NOT_IMPLEMENTED; } @@ -621,6 +629,11 @@ static int pls_poe_terminate_proc(const orte_process_name_t *name) return ORTE_ERR_NOT_IMPLEMENTED; } +static int pls_poe_terminate_orteds(orte_jobid_t jobid) +{ + return ORTE_ERR_NOT_IMPLEMENTED; +} + static int pls_poe_signal_job(orte_jobid_t jobid, int32_t signal) { return ORTE_ERR_NOT_IMPLEMENTED; @@ -633,7 +646,7 @@ static int pls_poe_signal_proc(const orte_process_name_t *name, int32_t signal) } /** -pls_poe_finalize - clean up tempolary files +pls_poe_finalize - clean up temporary files @return error number */ static int pls_poe_finalize(void) diff --git a/orte/mca/pls/fork/Makefile.am b/orte/mca/pls/proxy/Makefile.am similarity index 72% rename from orte/mca/pls/fork/Makefile.am rename to orte/mca/pls/proxy/Makefile.am index 2ceea2cfef..e86e46294c 100644 --- a/orte/mca/pls/fork/Makefile.am +++ b/orte/mca/pls/proxy/Makefile.am @@ -19,33 +19,33 @@ # Use the top-level Makefile.options -dist_pkgdata_DATA = help-orte-pls-fork.txt - -sources = \ - pls_fork.h \ - pls_fork_component.c \ - pls_fork_module.c # Make the output library in this directory, and name it either # mca__.la (for DSO builds) or libmca__.la # (for static builds). -if OMPI_BUILD_pls_fork_DSO +if OMPI_BUILD_pls_proxy_DSO component_noinst = -component_install = mca_pls_fork.la +component_install = mca_pls_proxy.la else -component_noinst = libmca_pls_fork.la +component_noinst = libmca_pls_proxy.la component_install = endif +proxy_SOURCES = \ + pls_proxy.c \ + pls_proxy.h \ + pls_proxy_component.c + mcacomponentdir = $(libdir)/openmpi mcacomponent_LTLIBRARIES = $(component_install) -mca_pls_fork_la_SOURCES = $(sources) -mca_pls_fork_la_LDFLAGS = -module -avoid-version -mca_pls_fork_la_LIBADD = \ +mca_pls_proxy_la_SOURCES = $(proxy_SOURCES) +mca_pls_proxy_la_LIBADD = \ $(top_ompi_builddir)/orte/liborte.la \ $(top_ompi_builddir)/opal/libopal.la +mca_pls_proxy_la_LDFLAGS = -module -avoid-version noinst_LTLIBRARIES = $(component_noinst) -libmca_pls_fork_la_SOURCES =$(sources) -libmca_pls_fork_la_LDFLAGS = -module -avoid-version +libmca_pls_proxy_la_SOURCES = $(proxy_SOURCES) +libmca_pls_proxy_la_LIBADD = +libmca_pls_proxy_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/pls/bproc_orted/configure.params b/orte/mca/pls/proxy/configure.params similarity index 95% rename from orte/mca/pls/bproc_orted/configure.params rename to orte/mca/pls/proxy/configure.params index dca46c249e..9be71c7199 100644 --- a/orte/mca/pls/bproc_orted/configure.params +++ b/orte/mca/pls/proxy/configure.params @@ -19,5 +19,5 @@ # Specific to this module -PARAM_INIT_FILE=pls_bproc_orted.c +PARAM_INIT_FILE=pls_proxy.c PARAM_CONFIG_FILES="Makefile" diff --git a/orte/mca/pls/proxy/pls_proxy.c b/orte/mca/pls/proxy/pls_proxy.c new file mode 100644 index 0000000000..d8fcdb352b --- /dev/null +++ b/orte/mca/pls/proxy/pls_proxy.c @@ -0,0 +1,442 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + */ + +#include "orte_config.h" + +#include + +#include "orte/orte_constants.h" +#include "orte/orte_types.h" + +#include "orte/dss/dss.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rml/rml.h" + +#include "orte/mca/pls/base/pls_private.h" +#include "pls_proxy.h" + +/** + * globals + */ + +/* + * functions + */ + +int orte_pls_proxy_launch(orte_jobid_t job) +{ + orte_buffer_t* cmd; + orte_buffer_t* answer; + orte_pls_cmd_flag_t command, ret_cmd; + orte_std_cntr_t count; + int rc; + + command = ORTE_PLS_LAUNCH_JOB_CMD; + + cmd = OBJ_NEW(orte_buffer_t); + if (cmd == NULL) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_PLS_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &job, 1, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + if (0 > orte_rml.send_buffer(orte_pls_proxy_replica, cmd, ORTE_RML_TAG_PLS, 0)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(cmd); + return ORTE_ERR_COMM_FAILURE; + } + OBJ_RELEASE(cmd); + + answer = OBJ_NEW(orte_buffer_t); + if(answer == NULL) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + if (0 > orte_rml.recv_buffer(orte_pls_proxy_replica, answer, ORTE_RML_TAG_PLS)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(answer); + return ORTE_ERR_COMM_FAILURE; + } + + count = 1; + if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &ret_cmd, &count, ORTE_PLS_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(answer); + return rc; + } + + if (ret_cmd != command) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(answer); + return ORTE_ERR_COMM_FAILURE; + } + + OBJ_RELEASE(answer); + return ORTE_SUCCESS; +} + +int orte_pls_proxy_terminate_job(orte_jobid_t job) +{ + orte_buffer_t* cmd; + orte_buffer_t* answer; + orte_pls_cmd_flag_t command, ret_cmd; + orte_std_cntr_t count; + int rc; + + command = ORTE_PLS_TERMINATE_JOB_CMD; + + cmd = OBJ_NEW(orte_buffer_t); + if (cmd == NULL) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_PLS_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &job, 1, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + if (0 > orte_rml.send_buffer(orte_pls_proxy_replica, cmd, ORTE_RML_TAG_PLS, 0)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(cmd); + return ORTE_ERR_COMM_FAILURE; + } + OBJ_RELEASE(cmd); + + answer = OBJ_NEW(orte_buffer_t); + if(answer == NULL) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + if (0 > orte_rml.recv_buffer(orte_pls_proxy_replica, answer, ORTE_RML_TAG_PLS)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(answer); + return ORTE_ERR_COMM_FAILURE; + } + + count = 1; + if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &ret_cmd, &count, ORTE_PLS_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(answer); + return rc; + } + + if (ret_cmd != command) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(answer); + return ORTE_ERR_COMM_FAILURE; + } + + OBJ_RELEASE(answer); + return ORTE_SUCCESS; +} + +int orte_pls_proxy_terminate_orteds(orte_jobid_t job) +{ + orte_buffer_t* cmd; + orte_buffer_t* answer; + orte_pls_cmd_flag_t command, ret_cmd; + orte_std_cntr_t count; + int rc; + + command = ORTE_PLS_TERMINATE_ORTEDS_CMD; + + cmd = OBJ_NEW(orte_buffer_t); + if (cmd == NULL) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_PLS_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &job, 1, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + if (0 > orte_rml.send_buffer(orte_pls_proxy_replica, cmd, ORTE_RML_TAG_PLS, 0)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(cmd); + return ORTE_ERR_COMM_FAILURE; + } + OBJ_RELEASE(cmd); + + answer = OBJ_NEW(orte_buffer_t); + if(answer == NULL) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + if (0 > orte_rml.recv_buffer(orte_pls_proxy_replica, answer, ORTE_RML_TAG_PLS)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(answer); + return ORTE_ERR_COMM_FAILURE; + } + + count = 1; + if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &ret_cmd, &count, ORTE_PLS_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(answer); + return rc; + } + + if (ret_cmd != command) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(answer); + return ORTE_ERR_COMM_FAILURE; + } + + OBJ_RELEASE(answer); + return ORTE_SUCCESS; +} + +int orte_pls_proxy_signal_job(orte_jobid_t job, int32_t signal) +{ + orte_buffer_t* cmd; + orte_buffer_t* answer; + orte_pls_cmd_flag_t command, ret_cmd; + orte_std_cntr_t count; + int rc; + + command = ORTE_PLS_SIGNAL_JOB_CMD; + + cmd = OBJ_NEW(orte_buffer_t); + if (cmd == NULL) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_PLS_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &job, 1, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &signal, 1, ORTE_INT32))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + if (0 > orte_rml.send_buffer(orte_pls_proxy_replica, cmd, ORTE_RML_TAG_PLS, 0)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(cmd); + return ORTE_ERR_COMM_FAILURE; + } + OBJ_RELEASE(cmd); + + answer = OBJ_NEW(orte_buffer_t); + if(answer == NULL) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + if (0 > orte_rml.recv_buffer(orte_pls_proxy_replica, answer, ORTE_RML_TAG_PLS)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(answer); + return ORTE_ERR_COMM_FAILURE; + } + + count = 1; + if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &ret_cmd, &count, ORTE_PLS_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(answer); + return rc; + } + + if (ret_cmd != command) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(answer); + return ORTE_ERR_COMM_FAILURE; + } + + OBJ_RELEASE(answer); + return ORTE_SUCCESS; +} + + +int orte_pls_proxy_terminate_proc(const orte_process_name_t* name) +{ + orte_buffer_t* cmd; + orte_buffer_t* answer; + orte_pls_cmd_flag_t command, ret_cmd; + orte_std_cntr_t count; + int rc; + + command = ORTE_PLS_TERMINATE_PROC_CMD; + + cmd = OBJ_NEW(orte_buffer_t); + if (cmd == NULL) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_PLS_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &name, 1, ORTE_NAME))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + if (0 > orte_rml.send_buffer(orte_pls_proxy_replica, cmd, ORTE_RML_TAG_PLS, 0)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(cmd); + return ORTE_ERR_COMM_FAILURE; + } + OBJ_RELEASE(cmd); + + answer = OBJ_NEW(orte_buffer_t); + if(answer == NULL) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + if (0 > orte_rml.recv_buffer(orte_pls_proxy_replica, answer, ORTE_RML_TAG_PLS)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(answer); + return ORTE_ERR_COMM_FAILURE; + } + + count = 1; + if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &ret_cmd, &count, ORTE_PLS_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(answer); + return rc; + } + + if (ret_cmd != command) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(answer); + return ORTE_ERR_COMM_FAILURE; + } + + OBJ_RELEASE(answer); + return ORTE_SUCCESS; +} + +int orte_pls_proxy_signal_proc(const orte_process_name_t* name, int32_t signal) +{ + orte_buffer_t* cmd; + orte_buffer_t* answer; + orte_pls_cmd_flag_t command, ret_cmd; + orte_std_cntr_t count; + int rc; + + command = ORTE_PLS_TERMINATE_PROC_CMD; + + cmd = OBJ_NEW(orte_buffer_t); + if (cmd == NULL) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_PLS_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &name, 1, ORTE_NAME))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &signal, 1, ORTE_INT32))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + if (0 > orte_rml.send_buffer(orte_pls_proxy_replica, cmd, ORTE_RML_TAG_PLS, 0)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(cmd); + return ORTE_ERR_COMM_FAILURE; + } + OBJ_RELEASE(cmd); + + answer = OBJ_NEW(orte_buffer_t); + if(answer == NULL) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + if (0 > orte_rml.recv_buffer(orte_pls_proxy_replica, answer, ORTE_RML_TAG_PLS)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(answer); + return ORTE_ERR_COMM_FAILURE; + } + + count = 1; + if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &ret_cmd, &count, ORTE_PLS_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(answer); + return rc; + } + + if (ret_cmd != command) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(answer); + return ORTE_ERR_COMM_FAILURE; + } + + OBJ_RELEASE(answer); + return ORTE_SUCCESS; +} + + + diff --git a/orte/mca/pls/proxy/pls_proxy.h b/orte/mca/pls/proxy/pls_proxy.h new file mode 100644 index 0000000000..df47d732fb --- /dev/null +++ b/orte/mca/pls/proxy/pls_proxy.h @@ -0,0 +1,62 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ +#ifndef ORTE_PLS_PROXY_H +#define ORTE_PLS_PROXY_H + +#include "orte_config.h" + +#include "orte/mca/pls/pls.h" + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +/* my replica */ +extern orte_process_name_t *orte_pls_proxy_replica; + +/* + * Module open / close + */ +int orte_pls_proxy_open(void); +int orte_pls_proxy_close(void); + + +/* + * Startup / Shutdown + */ +orte_pls_base_module_t* orte_pls_proxy_init(int *priority); +int orte_pls_proxy_finalize(void); + +/* + * proxy function prototypes + */ +int orte_pls_proxy_launch(orte_jobid_t job); +int orte_pls_proxy_terminate_job(orte_jobid_t job); +int orte_pls_proxy_terminate_orteds(orte_jobid_t job); +int orte_pls_proxy_terminate_proc(const orte_process_name_t* name); +int orte_pls_proxy_signal_job(orte_jobid_t job, int32_t signal); +int orte_pls_proxy_signal_proc(const orte_process_name_t* name, int32_t signal); + + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + +#endif diff --git a/orte/mca/pls/proxy/pls_proxy_component.c b/orte/mca/pls/proxy/pls_proxy_component.c new file mode 100644 index 0000000000..583423432a --- /dev/null +++ b/orte/mca/pls/proxy/pls_proxy_component.c @@ -0,0 +1,123 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + */ + +/* + * includes + */ +#include "orte_config.h" + +#include "orte/orte_constants.h" +#include "orte/util/proc_info.h" +#include "opal/util/output.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/mca/pls/base/pls_private.h" +#include "pls_proxy.h" + +/* + * Struct of function pointers that need to be initialized + */ +orte_pls_base_component_t mca_pls_proxy_component = { + { + ORTE_PLS_BASE_VERSION_1_3_0, + + "proxy", /* MCA module name */ + ORTE_MAJOR_VERSION, /* MCA module major version */ + ORTE_MINOR_VERSION, /* MCA module minor version */ + ORTE_RELEASE_VERSION, /* MCA module release version */ + orte_pls_proxy_open, /* module open */ + orte_pls_proxy_close /* module close */ + }, + { + false /* checkpoint / restart */ + }, + orte_pls_proxy_init /* module init */ +}; + +/* + * setup the function pointers for the module + */ +static orte_pls_base_module_t orte_pls_proxy_module = { + orte_pls_proxy_launch, + orte_pls_proxy_terminate_job, + orte_pls_proxy_terminate_orteds, + orte_pls_proxy_terminate_proc, + orte_pls_proxy_signal_job, + orte_pls_proxy_signal_proc, + orte_pls_proxy_finalize +}; + +/* + * Whether or not we allowed this component to be selected + */ +static bool initialized = false; + +/* the name of our replica */ +orte_process_name_t *orte_pls_proxy_replica; + +/* + * Not much to do here. + */ +int orte_pls_proxy_open(void) +{ + return ORTE_SUCCESS; +} + +/* + * ditto for this one + */ +int orte_pls_proxy_close(void) +{ + return ORTE_SUCCESS; +} + +orte_pls_base_module_t* orte_pls_proxy_init(int *priority) +{ + /* If we are an HNP, then don't pick us */ + if (orte_process_info.seed) { + return NULL; + } + + /* define the replica for us to use - for now, just point + * to the name service replica + */ + orte_pls_proxy_replica = orte_process_info.ns_replica; + + initialized = true; + *priority = 1; + return &orte_pls_proxy_module; +} + + +/* + * finalize routine + */ +int orte_pls_proxy_finalize(void) +{ + initialized = false; + + /* All done */ + + return ORTE_SUCCESS; +} diff --git a/orte/mca/pls/rsh/pls_rsh.h b/orte/mca/pls/rsh/pls_rsh.h index e52dcf24fd..d05125ff78 100644 --- a/orte/mca/pls/rsh/pls_rsh.h +++ b/orte/mca/pls/rsh/pls_rsh.h @@ -50,6 +50,7 @@ int orte_pls_rsh_finalize(void); */ int orte_pls_rsh_launch(orte_jobid_t); int orte_pls_rsh_terminate_job(orte_jobid_t); +int orte_pls_rsh_terminate_orteds(orte_jobid_t); int orte_pls_rsh_terminate_proc(const orte_process_name_t* proc_name); int orte_pls_rsh_signal_job(orte_jobid_t, int32_t); int orte_pls_rsh_signal_proc(const orte_process_name_t* proc_name, int32_t); diff --git a/orte/mca/pls/rsh/pls_rsh_component.c b/orte/mca/pls/rsh/pls_rsh_component.c index d86a7e7a80..1ce2969bbf 100644 --- a/orte/mca/pls/rsh/pls_rsh_component.c +++ b/orte/mca/pls/rsh/pls_rsh_component.c @@ -23,6 +23,7 @@ */ #include "orte_config.h" +#include "orte/orte_constants.h" #include #ifdef HAVE_UNISTD_H @@ -30,16 +31,19 @@ #endif #include -#include "orte/orte_constants.h" #include "opal/util/argv.h" #include "opal/util/path.h" #include "opal/util/basename.h" #include "opal/util/show_help.h" -#include "orte/mca/pls/pls.h" -#include "orte/mca/pls/rsh/pls_rsh.h" #include "opal/mca/base/mca_base_param.h" + +#include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rml/rml.h" +#include "orte/mca/pls/pls.h" +#include "orte/mca/pls/base/pls_private.h" +#include "orte/mca/pls/rsh/pls_rsh.h" + #if !defined(__WINDOWS__) extern char **environ; #endif /* !defined(__WINDOWS__) */ @@ -68,10 +72,10 @@ orte_pls_rsh_component_t mca_pls_rsh_component = { about the component itself */ { - /* Indicate that we are a pls v1.0.0 component (which also + /* Indicate that we are a pls v1.3.0 component (which also implies a specific MCA version) */ - ORTE_PLS_BASE_VERSION_1_0_0, + ORTE_PLS_BASE_VERSION_1_3_0, /* Component name and version */ @@ -175,7 +179,13 @@ orte_pls_base_module_t *orte_pls_rsh_component_init(int *priority) { char *bname; size_t i; + int rc; + /* if we are not an HNP, then don't select us */ + if (!orte_process_info.seed) { + return NULL; + } + /* Take the string that was given to us by the pla_rsh_agent MCA param and search for it */ mca_pls_rsh_component.agent_argv = @@ -219,6 +229,12 @@ orte_pls_base_module_t *orte_pls_rsh_component_init(int *priority) return NULL; } *priority = mca_pls_rsh_component.priority; + + /* ensure the receive gets posted */ + if (ORTE_SUCCESS != (rc = orte_pls_base_comm_start())) { + ORTE_ERROR_LOG(rc); + } + return &orte_pls_rsh_module; } diff --git a/orte/mca/pls/rsh/pls_rsh_module.c b/orte/mca/pls/rsh/pls_rsh_module.c index 99a4437f90..1cc0dfb0ed 100644 --- a/orte/mca/pls/rsh/pls_rsh_module.c +++ b/orte/mca/pls/rsh/pls_rsh_module.c @@ -24,6 +24,7 @@ */ #include "orte_config.h" +#include "orte/orte_constants.h" #include #ifdef HAVE_UNISTD_H @@ -66,22 +67,24 @@ #include "opal/util/opal_environ.h" #include "opal/util/output.h" #include "opal/util/basename.h" -#include "orte/orte_constants.h" + +#include "orte/util/sys_info.h" #include "orte/util/univ_info.h" #include "orte/util/session_dir.h" + #include "orte/runtime/orte_wait.h" + #include "orte/mca/ns/ns.h" -#include "orte/mca/pls/pls.h" -#include "orte/mca/pls/base/base.h" #include "orte/mca/rml/rml.h" #include "orte/mca/gpr/gpr.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/ras/base/ras_base_node.h" -#include "orte/mca/rmaps/base/rmaps_base_map.h" -#include "orte/mca/rmgr/base/base.h" +#include "orte/mca/ras/ras_types.h" +#include "orte/mca/rmaps/base/rmaps_private.h" #include "orte/mca/smr/smr.h" + +#include "orte/mca/pls/pls.h" +#include "orte/mca/pls/base/pls_private.h" #include "orte/mca/pls/rsh/pls_rsh.h" -#include "orte/util/sys_info.h" #if !defined(__WINDOWS__) extern char **environ; @@ -92,13 +95,14 @@ static int orte_pls_rsh_launch_threaded(orte_jobid_t jobid); #endif -orte_pls_base_module_1_0_0_t orte_pls_rsh_module = { +orte_pls_base_module_t orte_pls_rsh_module = { #if OMPI_HAVE_POSIX_THREADS && OMPI_THREADS_HAVE_DIFFERENT_PIDS && OMPI_ENABLE_PROGRESS_THREADS orte_pls_rsh_launch_threaded, #else orte_pls_rsh_launch, #endif orte_pls_rsh_terminate_job, + orte_pls_rsh_terminate_orteds, orte_pls_rsh_terminate_proc, orte_pls_rsh_signal_job, orte_pls_rsh_signal_proc, @@ -384,7 +388,7 @@ int orte_pls_rsh_launch(orte_jobid_t jobid) { opal_list_t mapping; opal_list_item_t* m_item, *n_item; - size_t num_nodes; + orte_std_cntr_t num_nodes; orte_vpid_t vpid; int node_name_index1; int node_name_index2; @@ -401,6 +405,13 @@ int orte_pls_rsh_launch(orte_jobid_t jobid) bool remote_bash = false, remote_csh = false; bool local_bash = false, local_csh = false; char *lib_base = NULL, *bin_base = NULL; + opal_list_t daemons; + orte_pls_daemon_info_t *dmn; + + /* setup a list that will contain the info for all the daemons + * so we can store it on the registry when done + */ + OBJ_CONSTRUCT(&daemons, opal_list_t); /* Query the list of nodes allocated and mapped to this job. * We need the entire mapping for a couple of reasons: @@ -433,6 +444,12 @@ int orte_pls_rsh_launch(orte_jobid_t jobid) goto cleanup; } + /* setup the orted triggers for passing their launch info */ + if (ORTE_SUCCESS != (rc = orte_smr.init_orted_stage_gates(jobid, num_nodes, NULL, NULL))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + /* need integer value for command line parameter */ if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, jobid))) { ORTE_ERROR_LOG(rc); @@ -519,7 +536,7 @@ int orte_pls_rsh_launch(orte_jobid_t jobid) opal_argv_append(&argc, &argv, mca_pls_rsh_component.orted); /* check for debug flags */ - orte_pls_base_proxy_mca_argv(&argc, &argv); + orte_pls_base_mca_argv(&argc, &argv); opal_argv_append(&argc, &argv, "--bootproxy"); opal_argv_append(&argc, &argv, jobid_string); @@ -647,6 +664,10 @@ int orte_pls_rsh_launch(orte_jobid_t jobid) if(ras_node->node_launched++ != 0) continue; + /* new daemon - setup to record its info */ + dmn = OBJ_NEW(orte_pls_daemon_info_t); + opal_list_append(&daemons, &dmn->super); + /* setup node name */ free(argv[node_name_index1]); if (NULL != ras_node->node_username && @@ -659,6 +680,9 @@ int orte_pls_rsh_launch(orte_jobid_t jobid) free(argv[node_name_index2]); argv[node_name_index2] = strdup(ras_node->node_name); + + /* save it in the daemon info */ + dmn->nodename = strdup(ras_node->node_name); /* initialize daemons process name */ rc = orte_ns.create_process_name(&name, ras_node->node_cellid, 0, vpid); @@ -666,9 +690,23 @@ int orte_pls_rsh_launch(orte_jobid_t jobid) ORTE_ERROR_LOG(rc); goto cleanup; } + + /* save it in the daemon info */ + dmn->cell = ras_node->node_cellid; + if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } /* rsh a child to exec the rsh/ssh session */ - #ifdef __WINDOWS__ + + /* set the process state to "launched" */ + if (ORTE_SUCCESS != (rc = orte_smr.set_proc_state(name, ORTE_PROC_STATE_LAUNCHED, 0))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + +#ifdef __WINDOWS__ printf("Unimplemented feature for windows\n"); return; #if 0 @@ -977,13 +1015,7 @@ int orte_pls_rsh_launch(orte_jobid_t jobid) opal_condition_wait(&mca_pls_rsh_component.cond, &mca_pls_rsh_component.lock); } OPAL_THREAD_UNLOCK(&mca_pls_rsh_component.lock); - - /* save the daemons name on the node */ - if (ORTE_SUCCESS != (rc = orte_pls_base_proxy_set_node_name(ras_node,jobid,name))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - + /* setup callback on sigchild - wait until setup above is complete * as the callback can occur in the call to orte_wait_cb */ @@ -1002,6 +1034,11 @@ int orte_pls_rsh_launch(orte_jobid_t jobid) free(name); } } + + /* all done, so store the daemon info on the registry */ + if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) { + ORTE_ERROR_LOG(rc); + } cleanup: while (NULL != (m_item = opal_list_remove_first(&mapping))) { @@ -1009,6 +1046,11 @@ cleanup: } OBJ_DESTRUCT(&mapping); + while (NULL != (m_item = opal_list_remove_first(&daemons))) { + OBJ_RELEASE(m_item); + } + OBJ_DESTRUCT(&daemons); + if (NULL != lib_base) { free(lib_base); } @@ -1024,41 +1066,112 @@ cleanup: /** - * Query the registry for all nodes participating in the job + * Terminate all processes for a given job */ int orte_pls_rsh_terminate_job(orte_jobid_t jobid) { - return orte_pls_base_proxy_terminate_job(jobid); + int rc; + opal_list_t daemons; + opal_list_item_t *item; + + /* construct the list of active daemons on this job */ + OBJ_CONSTRUCT(&daemons, opal_list_t); + if (ORTE_SUCCESS != (rc = orte_pls_base_get_active_daemons(&daemons, jobid))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* order them to kill their local procs for this job */ + if (ORTE_SUCCESS != (rc = orte_pls_base_orted_kill_local_procs(&daemons, jobid))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + +CLEANUP: + while (NULL != (item = opal_list_remove_first(&daemons))) { + OBJ_RELEASE(item); + } + OBJ_DESTRUCT(&daemons); + return rc; } +/** +* Terminate the orteds for a given job + */ +int orte_pls_rsh_terminate_orteds(orte_jobid_t jobid) +{ + int rc; + opal_list_t daemons; + opal_list_item_t *item; + + /* construct the list of active daemons on this job */ + OBJ_CONSTRUCT(&daemons, opal_list_t); + if (ORTE_SUCCESS != (rc = orte_pls_base_get_active_daemons(&daemons, jobid))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* now tell them to die! */ + if (ORTE_SUCCESS != (rc = orte_pls_base_orted_exit(&daemons))) { + ORTE_ERROR_LOG(rc); + } + +CLEANUP: + while (NULL != (item = opal_list_remove_first(&daemons))) { + OBJ_RELEASE(item); + } + OBJ_DESTRUCT(&daemons); + return rc; +} + +/* + * Terminate a specific process + */ int orte_pls_rsh_terminate_proc(const orte_process_name_t* proc) { - return orte_pls_base_proxy_terminate_proc(proc); + return ORTE_ERR_NOT_IMPLEMENTED; } int orte_pls_rsh_signal_job(orte_jobid_t jobid, int32_t signal) { - return orte_pls_base_proxy_signal_job(jobid, signal); + int rc; + opal_list_t daemons; + opal_list_item_t *item; + + /* construct the list of active daemons on this job */ + OBJ_CONSTRUCT(&daemons, opal_list_t); + if (ORTE_SUCCESS != (rc = orte_pls_base_get_active_daemons(&daemons, jobid))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&daemons); + return rc; + } + + /* order them to pass this signal to their local procs */ + if (ORTE_SUCCESS != (rc = orte_pls_base_orted_signal_local_procs(&daemons, signal))) { + ORTE_ERROR_LOG(rc); + } + + while (NULL != (item = opal_list_remove_first(&daemons))) { + OBJ_RELEASE(item); + } + OBJ_DESTRUCT(&daemons); + return rc; } int orte_pls_rsh_signal_proc(const orte_process_name_t* proc, int32_t signal) { - return orte_pls_base_proxy_signal_proc(proc, signal); + return ORTE_ERR_NOT_IMPLEMENTED; } int orte_pls_rsh_finalize(void) { - if (mca_pls_rsh_component.reap) { - OPAL_THREAD_LOCK(&mca_pls_rsh_component.lock); - while (mca_pls_rsh_component.num_children > 0) { - opal_condition_wait(&mca_pls_rsh_component.cond, &mca_pls_rsh_component.lock); - } - OPAL_THREAD_UNLOCK(&mca_pls_rsh_component.lock); - } - + int rc; + /* cleanup any pending recvs */ - orte_rml.recv_cancel(ORTE_RML_NAME_ANY, ORTE_RML_TAG_RMGR_CLNT); - return ORTE_SUCCESS; + if (ORTE_SUCCESS != (rc = orte_pls_base_comm_stop())) { + ORTE_ERROR_LOG(rc); + } + return rc; } diff --git a/orte/mca/pls/slurm/pls_slurm.h b/orte/mca/pls/slurm/pls_slurm.h index e31ccb8ad2..8787caeabb 100644 --- a/orte/mca/pls/slurm/pls_slurm.h +++ b/orte/mca/pls/slurm/pls_slurm.h @@ -43,7 +43,7 @@ extern "C" { ORTE_DECLSPEC extern orte_pls_slurm_component_t mca_pls_slurm_component; - ORTE_DECLSPEC extern orte_pls_base_module_1_0_0_t + ORTE_DECLSPEC extern orte_pls_base_module_t orte_pls_slurm_module; #if defined(c_plusplus) || defined(__cplusplus) diff --git a/orte/mca/pls/slurm/pls_slurm_component.c b/orte/mca/pls/slurm/pls_slurm_component.c index 7758525641..bfb1248752 100644 --- a/orte/mca/pls/slurm/pls_slurm_component.c +++ b/orte/mca/pls/slurm/pls_slurm_component.c @@ -27,8 +27,13 @@ #include "opal/util/output.h" #include "opal/mca/base/mca_base_param.h" #include "orte/orte_constants.h" + +#include "orte/util/proc_info.h" +#include "orte/mca/errmgr/errmgr.h" + #include "orte/mca/pls/pls.h" #include "orte/mca/pls/base/base.h" +#include "orte/mca/pls/base/pls_private.h" #include "pls_slurm.h" @@ -44,7 +49,7 @@ const char *mca_pls_slurm_component_version_string = */ static int pls_slurm_open(void); static int pls_slurm_close(void); -static struct orte_pls_base_module_1_0_0_t *pls_slurm_init(int *priority); +static orte_pls_base_module_t *pls_slurm_init(int *priority); /* @@ -59,10 +64,10 @@ orte_pls_slurm_component_t mca_pls_slurm_component = { information about the component itself */ { - /* Indicate that we are a pls v1.0.0 component (which also + /* Indicate that we are a pls v1.3.0 component (which also implies a specific MCA version) */ - ORTE_PLS_BASE_VERSION_1_0_0, + ORTE_PLS_BASE_VERSION_1_3_0, /* Component name and version */ @@ -121,14 +126,28 @@ static int pls_slurm_open(void) } -static struct orte_pls_base_module_1_0_0_t *pls_slurm_init(int *priority) +static orte_pls_base_module_t *pls_slurm_init(int *priority) { + int rc; + + /* if we are NOT an HNP, then don't select us */ + if (!orte_process_info.seed) { + return NULL; + } + /* Are we running under a SLURM job? */ if (NULL != getenv("SLURM_JOBID")) { *priority = mca_pls_slurm_component.priority; - opal_output(orte_pls_base.pls_output, - "pls:slurm: available for selection"); + if (mca_pls_slurm_component.debug) { + opal_output(0, "pls:slurm: available for selection"); + } + + /* ensure the receive gets posted */ + if (ORTE_SUCCESS != (rc = orte_pls_base_comm_start())) { + ORTE_ERROR_LOG(rc); + } + return &orte_pls_slurm_module; } diff --git a/orte/mca/pls/slurm/pls_slurm_module.c b/orte/mca/pls/slurm/pls_slurm_module.c index 8a1f34c7a8..0b12fae168 100644 --- a/orte/mca/pls/slurm/pls_slurm_module.c +++ b/orte/mca/pls/slurm/pls_slurm_module.c @@ -24,6 +24,8 @@ */ #include "orte_config.h" +#include "orte/orte_constants.h" +#include "orte/orte_types.h" #include #ifdef HAVE_UNISTD_H @@ -51,24 +53,25 @@ #include "opal/util/show_help.h" #include "opal/util/basename.h" #include "opal/mca/base/mca_base_param.h" + #include "orte/runtime/runtime.h" -#include "orte/orte_constants.h" -#include "orte/orte_types.h" -#include "orte/orte_constants.h" -#include "orte/mca/pls/pls.h" -#include "orte/mca/pls/base/base.h" #include "orte/mca/ns/base/base.h" #include "orte/mca/rml/rml.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rmaps/base/rmaps_base_map.h" +#include "orte/mca/smr/smr.h" +#include "orte/mca/rmaps/base/rmaps_private.h" + +#include "orte/mca/pls/pls.h" +#include "orte/mca/pls/base/pls_private.h" #include "pls_slurm.h" /* * Local functions */ -static int pls_slurm_launch(orte_jobid_t jobid); +static int pls_slurm_launch_job(orte_jobid_t jobid); static int pls_slurm_terminate_job(orte_jobid_t jobid); +static int pls_slurm_terminate_orteds(orte_jobid_t jobid); static int pls_slurm_terminate_proc(const orte_process_name_t *name); static int pls_slurm_signal_job(orte_jobid_t jobid, int32_t signal); static int pls_slurm_signal_proc(const orte_process_name_t *name, int32_t signal); @@ -81,9 +84,10 @@ static int pls_slurm_start_proc(int argc, char **argv, char **env, /* * Global variable */ -orte_pls_base_module_1_0_0_t orte_pls_slurm_module = { - pls_slurm_launch, +orte_pls_base_module_1_3_0_t orte_pls_slurm_module = { + pls_slurm_launch_job, pls_slurm_terminate_job, + pls_slurm_terminate_orteds, pls_slurm_terminate_proc, pls_slurm_signal_job, pls_slurm_signal_proc, @@ -103,7 +107,7 @@ static pid_t srun_pid = 0; extern char **environ; #endif /* !defined(__WINDOWS__) */ -static int pls_slurm_launch(orte_jobid_t jobid) +static int pls_slurm_launch_job(orte_jobid_t jobid) { opal_list_t nodes, mapping_list; opal_list_item_t *item, *item2; @@ -125,7 +129,14 @@ static int pls_slurm_launch(orte_jobid_t jobid) char **custom_strings; int num_args, i; char *cur_prefix; + opal_list_t daemons; + orte_pls_daemon_info_t *dmn; + /* setup a list that will contain the info for all the daemons + * so we can store it on the registry when done + */ + OBJ_CONSTRUCT(&daemons, opal_list_t); + /* Query the list of nodes allocated and mapped to this job. * We need the entire mapping for a couple of reasons: * - need the prefix to start with. @@ -151,6 +162,12 @@ static int pls_slurm_launch(orte_jobid_t jobid) goto cleanup; } + /* setup the orted triggers for passing their launch info */ + if (ORTE_SUCCESS != (rc = orte_smr.init_orted_stage_gates(jobid, num_nodes, NULL, NULL))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + /* need integer value for command line parameter */ asprintf(&jobid_string, "%lu", (unsigned long) jobid); @@ -212,7 +229,7 @@ static int pls_slurm_launch(orte_jobid_t jobid) opal_argv_append(&argc, &argv, "--no-daemonize"); /* check for debug flags */ - orte_pls_base_proxy_mca_argv(&argc, &argv); + orte_pls_base_mca_argv(&argc, &argv); /* proxy information */ opal_argv_append(&argc, &argv, "--bootproxy"); @@ -297,7 +314,6 @@ static int pls_slurm_launch(orte_jobid_t jobid) item != opal_list_get_end(&nodes); item = opal_list_get_next(item)) { orte_ras_node_t* node = (orte_ras_node_t*)item; - orte_process_name_t* name; opal_list_t map; size_t num_processes; @@ -351,24 +367,24 @@ static int pls_slurm_launch(orte_jobid_t jobid) } } - /* initialize daemons process name */ - rc = orte_ns.create_process_name(&name, node->node_cellid, 0, vpid); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - /* save the daemons name on the node */ - if (ORTE_SUCCESS != - (rc = orte_pls_base_proxy_set_node_name(node, jobid, name))) { + /* record the daemons info for this node */ + dmn = OBJ_NEW(orte_pls_daemon_info_t); + dmn->cell = node->node_cellid; + dmn->nodename = strdup(node->node_name); + if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(&(dmn->name), node->node_cellid, 0, vpid))) { ORTE_ERROR_LOG(rc); goto cleanup; } + opal_list_append(&daemons, &dmn->super); vpid++; - free(name); } + /* store the daemon info on the registry */ + if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) { + ORTE_ERROR_LOG(rc); + } + /* setup environment */ env = opal_argv_copy(environ); var = mca_base_param_environ_variable("seed", NULL, NULL); @@ -417,18 +433,71 @@ cleanup: OBJ_RELEASE(item); } OBJ_DESTRUCT(&mapping_list); + + while (NULL != (item = opal_list_remove_first(&daemons))) { + OBJ_RELEASE(item); + } + OBJ_DESTRUCT(&daemons); + return rc; } static int pls_slurm_terminate_job(orte_jobid_t jobid) { - if (0 != srun_pid) { - kill(srun_pid, SIGHUP); - /* JMS need appropriate code here to reap */ - srun_pid = 0; + int rc; + opal_list_t daemons; + opal_list_item_t *item; + + /* construct the list of active daemons on this job */ + OBJ_CONSTRUCT(&daemons, opal_list_t); + if (ORTE_SUCCESS != (rc = orte_pls_base_get_active_daemons(&daemons, jobid))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; } - return ORTE_SUCCESS; + + /* order them to kill their local procs for this job */ + if (ORTE_SUCCESS != (rc = orte_pls_base_orted_kill_local_procs(&daemons, jobid))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + +CLEANUP: + while (NULL != (item = opal_list_remove_first(&daemons))) { + OBJ_RELEASE(item); + } + OBJ_DESTRUCT(&daemons); + return rc; +} + + +/** +* Terminate the orteds for a given job + */ +static int pls_slurm_terminate_orteds(orte_jobid_t jobid) +{ + int rc; + opal_list_t daemons; + opal_list_item_t *item; + + /* construct the list of active daemons on this job */ + OBJ_CONSTRUCT(&daemons, opal_list_t); + if (ORTE_SUCCESS != (rc = orte_pls_base_get_active_daemons(&daemons, jobid))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* order them to go away */ + if (ORTE_SUCCESS != (rc = orte_pls_base_orted_exit(&daemons))) { + ORTE_ERROR_LOG(rc); + } + +CLEANUP: + while (NULL != (item = opal_list_remove_first(&daemons))) { + OBJ_RELEASE(item); + } + OBJ_DESTRUCT(&daemons); + return rc; } @@ -438,8 +507,7 @@ static int pls_slurm_terminate_job(orte_jobid_t jobid) */ static int pls_slurm_terminate_proc(const orte_process_name_t *name) { - opal_output(orte_pls_base.pls_output, - "pls:slurm:terminate_proc: not supported"); + opal_output(0, "pls:slurm:terminate_proc: not supported"); return ORTE_ERR_NOT_SUPPORTED; } @@ -461,15 +529,20 @@ static int pls_slurm_signal_job(orte_jobid_t jobid, int32_t signal) */ static int pls_slurm_signal_proc(const orte_process_name_t *name, int32_t signal) { - return orte_pls_base_proxy_signal_proc(name, signal); + opal_output(0, "pls:slurm:signal_proc: not supported"); + return ORTE_ERR_NOT_SUPPORTED; } static int pls_slurm_finalize(void) { - /* cleanup any pending recvs */ - orte_rml.recv_cancel(ORTE_RML_NAME_ANY, ORTE_RML_TAG_RMGR_CLNT); + int rc; + /* cleanup any pending recvs */ + if (ORTE_SUCCESS != (rc = orte_pls_base_comm_stop())) { + ORTE_ERROR_LOG(rc); + } + return ORTE_SUCCESS; } @@ -486,8 +559,7 @@ static int pls_slurm_start_proc(int argc, char **argv, char **env, srun_pid = fork(); if (-1 == srun_pid) { - opal_output(orte_pls_base.pls_output, - "pls:slurm:start_proc: fork failed"); + opal_output(0, "pls:slurm:start_proc: fork failed"); return ORTE_ERR_IN_ERRNO; } else if (0 == srun_pid) { char *bin_base = NULL, *lib_base = NULL; @@ -556,9 +628,9 @@ static int pls_slurm_start_proc(int argc, char **argv, char **env, cntl-c) don't get sent to srun */ setpgid(0, 0); - opal_output(orte_pls_base.pls_output, - "pls:slurm:start_proc: exec failed"); execve(exec_argv, argv, env); + + opal_output(0, "pls:slurm:start_proc: exec failed"); /* don't return - need to exit - returning would be bad - we're not in the calling process anymore */ exit(1); diff --git a/orte/mca/pls/tm/pls_tm.h b/orte/mca/pls/tm/pls_tm.h index 6ce3a835c5..2ec1d45265 100644 --- a/orte/mca/pls/tm/pls_tm.h +++ b/orte/mca/pls/tm/pls_tm.h @@ -42,7 +42,7 @@ extern "C" { /* Globally exported variables */ ORTE_DECLSPEC extern orte_pls_tm_component_t mca_pls_tm_component; - extern orte_pls_base_module_1_0_0_t orte_pls_tm_module; + extern orte_pls_base_module_t orte_pls_tm_module; #if defined(c_plusplus) || defined(__cplusplus) } diff --git a/orte/mca/pls/tm/pls_tm_component.c b/orte/mca/pls/tm/pls_tm_component.c index 68cfeb6aeb..e4cd7beb27 100644 --- a/orte/mca/pls/tm/pls_tm_component.c +++ b/orte/mca/pls/tm/pls_tm_component.c @@ -29,8 +29,13 @@ #include "opal/util/output.h" #include "opal/util/argv.h" #include "orte/orte_constants.h" + +#include "orte/util/proc_info.h" +#include "orte/mca/errmgr/errmgr.h" + #include "orte/mca/pls/pls.h" #include "orte/mca/pls/base/base.h" +#include "orte/mca/pls/base/pls_private.h" #include "pls_tm.h" @@ -47,7 +52,7 @@ const char *mca_pls_tm_component_version_string = */ static int pls_tm_open(void); static int pls_tm_close(void); -static struct orte_pls_base_module_1_0_0_t *pls_tm_init(int *priority); +static orte_pls_base_module_t *pls_tm_init(int *priority); /* @@ -61,9 +66,9 @@ orte_pls_tm_component_t mca_pls_tm_component = { about the component itself */ { - /* Indicate that we are a pls v1.0.0 component (which also + /* Indicate that we are a pls v1.3.0 component (which also implies a specific MCA version) */ - ORTE_PLS_BASE_VERSION_1_0_0, + ORTE_PLS_BASE_VERSION_1_3_0, /* Component name and version */ "tm", @@ -126,12 +131,25 @@ static int pls_tm_close(void) } -static struct orte_pls_base_module_1_0_0_t *pls_tm_init(int *priority) +static orte_pls_base_module_t *pls_tm_init(int *priority) { + int rc; + + /* if we are NOT an HNP, then don't select us */ + if (!orte_process_info.seed) { + return NULL; + } + /* Are we running under a TM job? */ if (NULL != getenv("PBS_ENVIRONMENT") && NULL != getenv("PBS_JOBID")) { + + /* ensure the receive gets posted */ + if (ORTE_SUCCESS != (rc = orte_pls_base_comm_start())) { + ORTE_ERROR_LOG(rc); + } + *priority = mca_pls_tm_component.priority; return &orte_pls_tm_module; } diff --git a/orte/mca/pls/tm/pls_tm_module.c b/orte/mca/pls/tm/pls_tm_module.c index 49e77df52d..a20f66bf16 100644 --- a/orte/mca/pls/tm/pls_tm_module.c +++ b/orte/mca/pls/tm/pls_tm_module.c @@ -24,6 +24,7 @@ */ #include "orte_config.h" +#include "orte/orte_constants.h" #if HAVE_UNISTD_H #include @@ -38,6 +39,7 @@ #include #include "opal/install_dirs.h" +#include "opal/threads/condition.h" #include "opal/event/event.h" #include "opal/util/argv.h" #include "opal/util/output.h" @@ -47,20 +49,23 @@ #include "opal/util/basename.h" #include "opal/mca/base/mca_base_param.h" #include "opal/runtime/opal_progress.h" -#include "orte/orte_constants.h" + #include "orte/orte_types.h" #include "orte/runtime/runtime.h" #include "orte/runtime/orte_wait.h" -#include "orte/mca/rmgr/base/base.h" -#include "orte/mca/rmaps/base/rmaps_base_map.h" #include "orte/mca/pls/pls.h" -#include "orte/mca/pls/base/base.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/smr/smr_types.h" +#include "orte/mca/smr/smr.h" #include "orte/mca/gpr/gpr.h" #include "orte/mca/sds/base/base.h" #include "orte/mca/rml/rml.h" #include "orte/mca/ns/ns.h" + +/* needs to be cleaned up for ORTE 2.0 */ +#include "orte/mca/rmaps/base/rmaps_private.h" + + +#include "orte/mca/pls/base/pls_private.h" #include "pls_tm.h" @@ -68,8 +73,9 @@ /* * Local functions */ -static int pls_tm_launch(orte_jobid_t jobid); +static int pls_tm_launch_job(orte_jobid_t jobid); static int pls_tm_terminate_job(orte_jobid_t jobid); +static int pls_tm_terminate_orteds(orte_jobid_t jobid); static int pls_tm_terminate_proc(const orte_process_name_t *name); static int pls_tm_signal_job(orte_jobid_t jobid, int32_t signal); static int pls_tm_signal_proc(const orte_process_name_t *name, int32_t signal); @@ -77,16 +83,29 @@ static int pls_tm_finalize(void); static int pls_tm_connect(void); static int pls_tm_disconnect(void); +static int pls_tm_query_hostnames(void); static int pls_tm_start_proc(char *nodename, int argc, char **argv, - char **env, tm_event_t *event); + char **env, tm_task_id *task_id, + tm_event_t *event); static int pls_tm_check_path(char *exe, char **env); +/* + * Local variables + */ +/* Resolving TM hostname */ +static char **tm_hostnames = NULL; +static tm_node_id *tm_node_ids = NULL; +static int num_tm_hostnames = 0, num_node_ids = 0; + + + /* * Global variable */ -orte_pls_base_module_1_0_0_t orte_pls_tm_module = { - pls_tm_launch, +orte_pls_base_module_t orte_pls_tm_module = { + pls_tm_launch_job, pls_tm_terminate_job, + pls_tm_terminate_orteds, pls_tm_terminate_proc, pls_tm_signal_job, pls_tm_signal_proc, @@ -97,8 +116,7 @@ orte_pls_base_module_1_0_0_t orte_pls_tm_module = { extern char **environ; #endif /* !defined(__WINDOWS__) */ -static int -pls_tm_launch(orte_jobid_t jobid) +static int pls_tm_launch_job(orte_jobid_t jobid) { opal_list_t mapping; opal_list_item_t *m_item, *n_item; @@ -112,10 +130,15 @@ pls_tm_launch(orte_jobid_t jobid) int argc; int rc; bool connected = false; - int launched = 0, i; + uint launched = 0, i; char *bin_base = NULL, *lib_base = NULL; - tm_event_t *events = NULL; - + tm_event_t *tm_events = NULL; + tm_task_id *tm_task_ids = NULL; + int local_err; + tm_event_t event; + opal_list_t daemons; + orte_pls_daemon_info_t *dmn; + /* Query the list of nodes allocated and mapped to this job. * We need the entire mapping for a couple of reasons: * - need the prefix to start with. @@ -138,7 +161,7 @@ pls_tm_launch(orte_jobid_t jobid) /* * Allocate a range of vpids for the daemons. */ - if (num_nodes == 0) { + if (0 == num_nodes) { return ORTE_ERR_BAD_PARAM; } rc = orte_ns.reserve_range(0, num_nodes, &vpid); @@ -146,9 +169,25 @@ pls_tm_launch(orte_jobid_t jobid) goto cleanup; } + /* setup the orted triggers for passing their launch info */ + if (ORTE_SUCCESS != (rc = orte_smr.init_orted_stage_gates(jobid, num_nodes, NULL, NULL))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + /* setup a list that will contain the info for all the daemons + * so we can store it on the registry when done + */ + OBJ_CONSTRUCT(&daemons, opal_list_t); + /* Allocate a bunch of TM events to use for tm_spawn()ing */ - events = malloc(sizeof(tm_event_t) * num_nodes); - if (NULL == events) { + tm_events = malloc(sizeof(tm_event_t) * num_nodes); + if (NULL == tm_events) { + rc = ORTE_ERR_OUT_OF_RESOURCE; + goto cleanup; + } + tm_task_ids = malloc(sizeof(tm_task_id) * num_nodes); + if (NULL == tm_task_ids) { rc = ORTE_ERR_OUT_OF_RESOURCE; goto cleanup; } @@ -163,7 +202,7 @@ pls_tm_launch(orte_jobid_t jobid) opal_argv_append(&argc, &argv, "--no-daemonize"); /* check for debug flags */ - orte_pls_base_proxy_mca_argv(&argc, &argv); + orte_pls_base_mca_argv(&argc, &argv); /* proxy information */ opal_argv_append(&argc, &argv, "--bootproxy"); @@ -232,6 +271,15 @@ pls_tm_launch(orte_jobid_t jobid) } connected = true; + /* Resolve the TM hostnames and TD node ID's (guarantee that we + don't mix any of these TM events in with the TM spawn events, + so that we can poll for each set of events without interference + from the other */ + rc = pls_tm_query_hostnames(); + if (ORTE_SUCCESS != rc) { + goto cleanup; + } + /* Figure out the basenames for the libdir and bindir. There is a lengthy comment about this in pls_rsh_module.c explaining all the rationale for how / why we're doing this. */ @@ -252,11 +300,10 @@ pls_tm_launch(orte_jobid_t jobid) env = opal_argv_copy(environ); var = mca_base_param_environ_variable("seed",NULL,NULL); opal_setenv(var, "0", true, &env); - + /* If we have a prefix, then modify the PATH and LD_LIBRARY_PATH environment variables. */ if (NULL != map->app->prefix_dir) { - int i; char *newenv; for (i = 0; NULL != env && NULL != env[i]; ++i) { @@ -313,8 +360,18 @@ pls_tm_launch(orte_jobid_t jobid) continue; } + /* new daemon - setup to record its info */ + dmn = OBJ_NEW(orte_pls_daemon_info_t); + dmn->active_job = jobid; + opal_list_append(&daemons, &dmn->super); + /* setup node name */ - argv[node_name_index] = node->node_name; + free(argv[node_name_index]); + argv[node_name_index] = strdup(node->node_name); + + /* record the node name in the daemon struct */ + dmn->cell = node->node_cellid; + dmn->nodename = strdup(node->node_name); /* initialize daemons process name */ rc = orte_ns.create_process_name(&name, node->node_cellid, 0, vpid); @@ -323,6 +380,12 @@ pls_tm_launch(orte_jobid_t jobid) goto cleanup; } + /* save it in the daemon struct */ + if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + /* setup per-node options */ if (mca_pls_tm_component.debug || mca_pls_tm_component.verbose) { @@ -336,8 +399,9 @@ pls_tm_launch(orte_jobid_t jobid) opal_output(0, "pls:tm: unable to create process name"); return rc; } - argv[proc_name_index] = name_string; - + free(argv[proc_name_index]); + argv[proc_name_index] = strdup(name_string); + /* set the progress engine schedule for this node. * if node_slots is set to zero, then we default to * NOT being oversubscribed @@ -359,12 +423,6 @@ pls_tm_launch(orte_jobid_t jobid) opal_setenv(var, "0", true, &env); } free(var); - - /* save the daemons name on the node */ - if (ORTE_SUCCESS != (rc = orte_pls_base_proxy_set_node_name(node,jobid,name))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } /* exec the daemon */ if (mca_pls_tm_component.debug) { @@ -376,39 +434,48 @@ pls_tm_launch(orte_jobid_t jobid) } rc = pls_tm_start_proc(node->node_name, argc, argv, env, - events + launched); + tm_task_ids + launched, + tm_events + launched); if (ORTE_SUCCESS != rc) { opal_output(0, "pls:tm: start_procs returned error %d", rc); goto cleanup; } launched++; - vpid++; + ++vpid; free(name); + + /* Allow some progress to occur */ opal_event_loop(OPAL_EVLOOP_NONBLOCK); } - + } + if (mca_pls_tm_component.debug) { + opal_output(0, "pls:tm:launch: finished spawning orteds\n"); } - /* loop through all those that are launched and poll for - completion status */ - - for(i = 0; i < launched; i++){ - int ret, local_err; - tm_event_t event; - ret = tm_poll(TM_NULL_EVENT, &event, 1, &local_err); - if (TM_SUCCESS != ret) { - errno = local_err; - opal_output(0, "pls:tm: failed to start a proc error %d", ret); - goto cleanup; - } + /* all done, so store the daemon info on the registry */ + if (ORTE_SUCCESS != (rc = orte_pls_base_store_active_daemons(&daemons, jobid))) { + ORTE_ERROR_LOG(rc); } + /* TM poll for all the spawns */ + for (i = 0; i < launched; ++i) { + rc = tm_poll(TM_NULL_EVENT, &event, 1, &local_err); + if (TM_SUCCESS != rc) { + errno = local_err; + opal_output(0, "pls:tm: failed to poll for a spawned proc, return status = %d", rc); + return ORTE_ERR_IN_ERRNO; + } + } + cleanup: if (connected) { pls_tm_disconnect(); } - if (NULL != events) { - free(events); + if (NULL != tm_events) { + free(tm_events); + } + if (NULL != tm_task_ids) { + free(tm_task_ids); } while (NULL != (m_item = opal_list_remove_first(&mapping))) { @@ -422,58 +489,149 @@ pls_tm_launch(orte_jobid_t jobid) free(bin_base); } + /* deconstruct the daemon list */ + while (NULL != (m_item = opal_list_remove_first(&daemons))) { + OBJ_RELEASE(m_item); + } + OBJ_DESTRUCT(&daemons); + + if (mca_pls_tm_component.debug) { + opal_output(0, "pls:tm:launch: finished\n"); + } return rc; } -static int -pls_tm_terminate_job(orte_jobid_t jobid) +static int pls_tm_terminate_job(orte_jobid_t jobid) { - return orte_pls_base_proxy_terminate_job(jobid); + int rc; + opal_list_t daemons; + opal_list_item_t *item; + + /* construct the list of active daemons on this job */ + OBJ_CONSTRUCT(&daemons, opal_list_t); + if (ORTE_SUCCESS != (rc = orte_pls_base_get_active_daemons(&daemons, jobid))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* order them to kill their local procs for this job */ + if (ORTE_SUCCESS != (rc = orte_pls_base_orted_kill_local_procs(&daemons, jobid))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + +CLEANUP: + while (NULL != (item = opal_list_remove_first(&daemons))) { + OBJ_RELEASE(item); + } + OBJ_DESTRUCT(&daemons); + return rc; } +/** + * Terminate the orteds for a given job + */ +int pls_tm_terminate_orteds(orte_jobid_t jobid) +{ + int rc; + opal_list_t daemons; + opal_list_item_t *item; + + /* construct the list of active daemons on this job */ + OBJ_CONSTRUCT(&daemons, opal_list_t); + if (ORTE_SUCCESS != (rc = orte_pls_base_get_active_daemons(&daemons, jobid))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + + /* now tell them to die! */ + if (ORTE_SUCCESS != (rc = orte_pls_base_orted_exit(&daemons))) { + ORTE_ERROR_LOG(rc); + } + +CLEANUP: + while (NULL != (item = opal_list_remove_first(&daemons))) { + OBJ_RELEASE(item); + } + OBJ_DESTRUCT(&daemons); + return rc; +} + /* * TM can't kill individual processes -- PBS will kill the entire job */ -static int -pls_tm_terminate_proc(const orte_process_name_t *name) +static int pls_tm_terminate_proc(const orte_process_name_t *name) { - opal_output(orte_pls_base.pls_output, - "pls:tm:terminate_proc: not supported"); + if (mca_pls_tm_component.debug) { + opal_output(0, "pls:tm:terminate_proc: not supported"); + } return ORTE_ERR_NOT_SUPPORTED; } -static int -pls_tm_signal_job(orte_jobid_t jobid, int32_t signal) +static int pls_tm_signal_job(orte_jobid_t jobid, int32_t signal) { - return orte_pls_base_proxy_signal_job(jobid, signal); + int rc; + opal_list_t daemons; + opal_list_item_t *item; + + /* construct the list of active daemons on this job */ + OBJ_CONSTRUCT(&daemons, opal_list_t); + if (ORTE_SUCCESS != (rc = orte_pls_base_get_active_daemons(&daemons, jobid))) { + ORTE_ERROR_LOG(rc); + OBJ_DESTRUCT(&daemons); + return rc; + } + + /* order them to pass this signal to their local procs */ + if (ORTE_SUCCESS != (rc = orte_pls_base_orted_signal_local_procs(&daemons, signal))) { + ORTE_ERROR_LOG(rc); + } + + while (NULL != (item = opal_list_remove_first(&daemons))) { + OBJ_RELEASE(item); + } + OBJ_DESTRUCT(&daemons); + return rc; } -static int -pls_tm_signal_proc(const orte_process_name_t *name, int32_t signal) +static int pls_tm_signal_proc(const orte_process_name_t *name, int32_t signal) { - return orte_pls_base_proxy_signal_proc(name, signal); + return ORTE_ERR_NOT_IMPLEMENTED; } /* * Free stuff */ -static int -pls_tm_finalize(void) +static int pls_tm_finalize(void) { + int rc; + /* cleanup any pending recvs */ - orte_rml.recv_cancel(ORTE_RML_NAME_ANY, ORTE_RML_TAG_RMGR_CLNT); + if (ORTE_SUCCESS != (rc = orte_pls_base_comm_stop())) { + ORTE_ERROR_LOG(rc); + } + + if (NULL != tm_hostnames) { + opal_argv_free(tm_hostnames); + tm_hostnames = NULL; + num_tm_hostnames = 0; + } + if (NULL != tm_node_ids) { + free(tm_node_ids); + tm_node_ids = NULL; + num_node_ids = 0; + } return ORTE_SUCCESS; } -static int -pls_tm_connect(void) +static int pls_tm_connect(void) { int ret; struct tm_roots tm_root; @@ -499,27 +657,19 @@ pls_tm_connect(void) } -static int -pls_tm_disconnect(void) +static int pls_tm_disconnect(void) { tm_finalize(); return ORTE_SUCCESS; } -static char **tm_hostnames = NULL; -static tm_node_id *tm_node_ids = NULL; -static int num_tm_hostnames, num_node_ids; - - -/* we don't call this anymore */ /* * For a given TM node ID, get the string hostname corresponding to * it. */ -static char* -get_tm_hostname(tm_node_id node) +static char *get_tm_hostname(tm_node_id node) { char *hostname; char buffer[256]; @@ -531,6 +681,7 @@ get_tm_hostname(tm_node_id node) ret = tm_rescinfo(node, buffer, sizeof(buffer) - 1, &event); if (TM_SUCCESS != ret) { + opal_output(0, "tm_rescinfo returned %d\n", ret); return NULL; } @@ -538,6 +689,7 @@ get_tm_hostname(tm_node_id node) ret = tm_poll(TM_NULL_EVENT, &event, 1, &local_errno); if (TM_SUCCESS != ret) { + opal_output(0, "tm_poll returned %d\n", ret); return NULL; } @@ -548,6 +700,7 @@ get_tm_hostname(tm_node_id node) buffer[sizeof(buffer) - 1] = '\0'; argv = opal_argv_split(buffer, ' '); if (NULL == argv) { + opal_output(0, "opal_argv_split failed\n"); return NULL; } hostname = strdup(argv[1]); @@ -559,9 +712,7 @@ get_tm_hostname(tm_node_id node) } -/* we don't call this anymore!*/ -static int -query_tm_hostnames(void) +static int pls_tm_query_hostnames(void) { char *h; int i, ret; @@ -584,6 +735,10 @@ query_tm_hostnames(void) num_tm_hostnames = 0; for (i = 0; i < num_node_ids; ++i) { h = get_tm_hostname(tm_node_ids[i]); + if (NULL == h) { + opal_output(0, "get_tm_hostname returned NULL"); + return ORTE_ERROR; + } opal_argv_append(&num_tm_hostnames, &tm_hostnames, h); free(h); } @@ -593,27 +748,23 @@ query_tm_hostnames(void) return ORTE_SUCCESS; } -/* we don't call this anymore! */ -static int -do_tm_resolve(char *hostname, tm_node_id *tnodeid) +static int do_tm_resolve(char *hostname, tm_node_id *tnodeid) { int i, ret; /* Have we already queried TM for all the node info? */ if (NULL == tm_hostnames) { - ret = query_tm_hostnames(); - if (ORTE_SUCCESS != ret) { - return ret; - } + return ORTE_ERR_NOT_FOUND; } /* Find the TM ID of the hostname that we're looking for */ for (i = 0; i < num_tm_hostnames; ++i) { if (0 == strcmp(hostname, tm_hostnames[i])) { *tnodeid = tm_node_ids[i]; - opal_output(orte_pls_base.pls_output, - "pls:tm:launch: resolved host %s to node ID %d", - hostname, tm_node_ids[i]); + if (mca_pls_tm_component.debug) { + opal_output(0, "pls:tm:launch: resolved host %s to node ID %d", + hostname, tm_node_ids[i]); + } break; } } @@ -629,20 +780,22 @@ do_tm_resolve(char *hostname, tm_node_id *tnodeid) } -static int -pls_tm_start_proc(char *nodename, int argc, char **argv, char **env, - tm_event_t *event) +static int pls_tm_start_proc(char *nodename, int argc, char **argv, char **env, + tm_task_id *task_id, tm_event_t *event) { int ret; tm_node_id node_id; - tm_task_id task_id; /* get the tm node id for this node */ ret = do_tm_resolve(nodename, &node_id); - if (ORTE_SUCCESS != ret) return ret; + if (ORTE_SUCCESS != ret) { + return ret; + } - ret = tm_spawn(argc, argv, env, node_id, &task_id, event); - if (TM_SUCCESS != ret) return ORTE_ERROR; + ret = tm_spawn(argc, argv, env, node_id, task_id, event); + if (TM_SUCCESS != ret) { + return ORTE_ERROR; + } return ORTE_SUCCESS; } diff --git a/orte/mca/pls/xgrid/.ompi_ignore b/orte/mca/pls/xgrid/.ompi_ignore new file mode 100644 index 0000000000..e69de29bb2 diff --git a/orte/mca/pls/xgrid/src/pls_xgrid_component.m b/orte/mca/pls/xgrid/src/pls_xgrid_component.m index 969e9b5759..77affc461d 100644 --- a/orte/mca/pls/xgrid/src/pls_xgrid_component.m +++ b/orte/mca/pls/xgrid/src/pls_xgrid_component.m @@ -31,6 +31,8 @@ #import "opal/util/argv.h" #import "opal/util/path.h" #import "opal/util/basename.h" + +#import "orte/util/proc_info.h" #import "orte/mca/pls/pls.h" #import "orte/mca/pls/base/base.h" #import "opal/mca/base/mca_base_param.h" @@ -119,6 +121,11 @@ orte_pls_xgrid_component_init(int *priority) { char *string; int ret, val, param; + + /* if we are NOT an HNP, then don't select us */ + if (!orte_process_info.seed) { + return NULL; + } if (NULL == getenv("XGRID_CONTROLLER_HOSTNAME") || NULL == getenv("XGRID_CONTROLLER_PASSWORD")) { diff --git a/orte/mca/ras/base/Makefile.am b/orte/mca/ras/base/Makefile.am index b660bd627f..ab760e39e9 100644 --- a/orte/mca/ras/base/Makefile.am +++ b/orte/mca/ras/base/Makefile.am @@ -18,16 +18,17 @@ headers += \ base/base.h \ - base/ras_base_node.h + base/ras_private.h libmca_ras_la_SOURCES += \ base/ras_base_alloc.c \ base/ras_base_allocate.c \ base/ras_base_close.c \ base/ras_base_find_available.c \ - base/ras_base_node.h \ base/ras_base_node.c \ + base/ras_base_no_ops.c \ base/ras_base_open.c \ + base/ras_base_receive.c \ base/data_type_support/ras_data_type_compare_fns.c \ base/data_type_support/ras_data_type_copy_fns.c \ base/data_type_support/ras_data_type_packing_fns.c \ diff --git a/orte/mca/ras/base/base.h b/orte/mca/ras/base/base.h index f6fb2cca44..45b1711bba 100644 --- a/orte/mca/ras/base/base.h +++ b/orte/mca/ras/base/base.h @@ -56,6 +56,8 @@ struct orte_ras_base_cmp_t { int priority; }; typedef struct orte_ras_base_cmp_t orte_ras_base_cmp_t; +/** Class declaration */ +ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_ras_base_cmp_t); /* @@ -63,19 +65,13 @@ typedef struct orte_ras_base_cmp_t orte_ras_base_cmp_t; */ ORTE_DECLSPEC int orte_ras_base_open(void); ORTE_DECLSPEC int orte_ras_base_find_available(void); -ORTE_DECLSPEC int orte_ras_base_allocate(orte_jobid_t job, - orte_ras_base_module_t **m); ORTE_DECLSPEC int orte_ras_base_finalize(void); ORTE_DECLSPEC int orte_ras_base_close(void); -ORTE_DECLSPEC int orte_ras_base_allocate_nodes(orte_jobid_t jobid, - opal_list_t* nodes); /* * globals that might be needed */ - - typedef struct orte_ras_base_t { int ras_output; opal_list_t ras_opened; @@ -87,22 +83,6 @@ typedef struct orte_ras_base_t { ORTE_DECLSPEC extern orte_ras_base_t orte_ras_base; -/** Class declaration */ -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_ras_base_cmp_t); - - -/** Local data type functions */ -int orte_ras_base_copy_node(orte_ras_node_t **dest, orte_ras_node_t *src, orte_data_type_t type); -int orte_ras_base_compare_node(orte_ras_node_t *value1, orte_ras_node_t *value2, orte_data_type_t type); -int orte_ras_base_pack_node(orte_buffer_t *buffer, void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); -int orte_ras_base_print_node(char **output, char *prefix, orte_ras_node_t *src, orte_data_type_t type); -void orte_ras_base_std_obj_release(orte_data_value_t *value); -int orte_ras_base_size_node(size_t *size, orte_ras_node_t *src, orte_data_type_t type); -int orte_ras_base_unpack_node(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - - /* * external API functions will be documented in the mca/ns/ns.h file */ diff --git a/orte/mca/ras/base/data_type_support/ras_data_type_compare_fns.c b/orte/mca/ras/base/data_type_support/ras_data_type_compare_fns.c index bd81a83bc0..788060818a 100644 --- a/orte/mca/ras/base/data_type_support/ras_data_type_compare_fns.c +++ b/orte/mca/ras/base/data_type_support/ras_data_type_compare_fns.c @@ -24,7 +24,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/dss/dss_internal.h" -#include "orte/mca/ras/base/base.h" +#include "orte/mca/ras/base/ras_private.h" /** * RAS NODE diff --git a/orte/mca/ras/base/data_type_support/ras_data_type_copy_fns.c b/orte/mca/ras/base/data_type_support/ras_data_type_copy_fns.c index d7db293cd7..1fad4e2a54 100644 --- a/orte/mca/ras/base/data_type_support/ras_data_type_copy_fns.c +++ b/orte/mca/ras/base/data_type_support/ras_data_type_copy_fns.c @@ -29,7 +29,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/dss/dss_internal.h" -#include "orte/mca/ras/base/base.h" +#include "orte/mca/ras/base/ras_private.h" /** * RAS NODE diff --git a/orte/mca/ras/base/data_type_support/ras_data_type_packing_fns.c b/orte/mca/ras/base/data_type_support/ras_data_type_packing_fns.c index 9e089313b2..98c063fe97 100644 --- a/orte/mca/ras/base/data_type_support/ras_data_type_packing_fns.c +++ b/orte/mca/ras/base/data_type_support/ras_data_type_packing_fns.c @@ -28,7 +28,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/dss/dss_internal.h" -#include "orte/mca/ras/base/base.h" +#include "orte/mca/ras/base/ras_private.h" /* * RAS NODE diff --git a/orte/mca/ras/base/data_type_support/ras_data_type_print_fns.c b/orte/mca/ras/base/data_type_support/ras_data_type_print_fns.c index 04450f4b4b..e50e1d87c6 100644 --- a/orte/mca/ras/base/data_type_support/ras_data_type_print_fns.c +++ b/orte/mca/ras/base/data_type_support/ras_data_type_print_fns.c @@ -29,7 +29,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/dss/dss_internal.h" -#include "orte/mca/ras/base/base.h" +#include "orte/mca/ras/base/ras_private.h" /* * RAS NODE diff --git a/orte/mca/ras/base/data_type_support/ras_data_type_release_fns.c b/orte/mca/ras/base/data_type_support/ras_data_type_release_fns.c index e0c9ba072a..9a718de0c2 100644 --- a/orte/mca/ras/base/data_type_support/ras_data_type_release_fns.c +++ b/orte/mca/ras/base/data_type_support/ras_data_type_release_fns.c @@ -24,7 +24,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/dss/dss_internal.h" -#include "orte/mca/ras/base/base.h" +#include "orte/mca/ras/base/ras_private.h" /* * STANDARD OBJECT RELEASE diff --git a/orte/mca/ras/base/data_type_support/ras_data_type_size_fns.c b/orte/mca/ras/base/data_type_support/ras_data_type_size_fns.c index cd29272d7d..6a4cc3f654 100644 --- a/orte/mca/ras/base/data_type_support/ras_data_type_size_fns.c +++ b/orte/mca/ras/base/data_type_support/ras_data_type_size_fns.c @@ -27,7 +27,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/dss/dss_internal.h" -#include "orte/mca/ras/base/base.h" +#include "orte/mca/ras/base/ras_private.h" /* * RAS NODE diff --git a/orte/mca/ras/base/data_type_support/ras_data_type_unpacking_fns.c b/orte/mca/ras/base/data_type_support/ras_data_type_unpacking_fns.c index cd93077cfc..993c204865 100644 --- a/orte/mca/ras/base/data_type_support/ras_data_type_unpacking_fns.c +++ b/orte/mca/ras/base/data_type_support/ras_data_type_unpacking_fns.c @@ -30,7 +30,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/dss/dss_internal.h" -#include "orte/mca/ras/base/base.h" +#include "orte/mca/ras/base/ras_private.h" /* * RAS NODE diff --git a/orte/mca/ras/base/ras_base_alloc.c b/orte/mca/ras/base/ras_base_alloc.c index 13c54580bf..3eaf3564b6 100644 --- a/orte/mca/ras/base/ras_base_alloc.c +++ b/orte/mca/ras/base/ras_base_alloc.c @@ -23,10 +23,10 @@ #include "opal/mca/mca.h" #include "opal/mca/base/base.h" #include "orte/mca/ras/base/base.h" -#include "orte/mca/ras/base/ras_base_node.h" -#include "orte/mca/rmgr/base/base.h" #include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/ras/base/ras_private.h" + /* * Mark nodes as allocated on the registry */ diff --git a/orte/mca/ras/base/ras_base_allocate.c b/orte/mca/ras/base/ras_base_allocate.c index 0244f9022d..417f591282 100644 --- a/orte/mca/ras/base/ras_base_allocate.c +++ b/orte/mca/ras/base/ras_base_allocate.c @@ -22,17 +22,15 @@ #include "opal/mca/base/base.h" #include "opal/util/output.h" #include "orte/orte_constants.h" -#include "orte/mca/ras/base/base.h" -#include "orte/mca/ras/base/ras_base_node.h" #include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/ras/base/ras_private.h" /* * Function for selecting one component from all those that are * available. */ -int orte_ras_base_allocate(orte_jobid_t jobid, - orte_ras_base_module_t **module) +int orte_ras_base_allocate(orte_jobid_t jobid) { int ret; opal_list_item_t *item; @@ -60,8 +58,8 @@ int orte_ras_base_allocate(orte_jobid_t jobid, "orte:ras:base:allocate: attemping to allocate using module: %s", cmp->component->ras_version.mca_component_name); - if (NULL != cmp->module->allocate) { - ret = cmp->module->allocate(jobid); + if (NULL != cmp->module->allocate_job) { + ret = cmp->module->allocate_job(jobid); if (ORTE_SUCCESS == ret) { bool empty; @@ -78,7 +76,6 @@ int orte_ras_base_allocate(orte_jobid_t jobid, opal_output(orte_ras_base.ras_output, "orte:ras:base:allocate: found good module: %s", cmp->component->ras_version.mca_component_name); - *module = cmp->module; return ORTE_SUCCESS; } } @@ -93,3 +90,9 @@ int orte_ras_base_allocate(orte_jobid_t jobid, ORTE_ERROR_LOG(ret); return ret; } + +int orte_ras_base_deallocate(orte_jobid_t job) +{ + return ORTE_SUCCESS; +} + diff --git a/orte/mca/ras/base/ras_base_close.c b/orte/mca/ras/base/ras_base_close.c index ccbea98715..b9ac96a6ea 100644 --- a/orte/mca/ras/base/ras_base_close.c +++ b/orte/mca/ras/base/ras_base_close.c @@ -31,8 +31,8 @@ int orte_ras_base_finalize(void) { opal_list_item_t* item; - /* Finalize all available modules */ if (orte_ras_base.ras_available_valid) { + /* Finalize all available modules */ while (NULL != (item = opal_list_remove_first(&orte_ras_base.ras_available))) { orte_ras_base_cmp_t* cmp = (orte_ras_base_cmp_t*)item; @@ -48,11 +48,13 @@ int orte_ras_base_finalize(void) int orte_ras_base_close(void) { - /* Close all remaining available components (may be one if this is a - Open RTE program, or [possibly] multiple if this is ompi_info) */ + if (orte_ras_base.ras_opened_valid) { + /* Close all remaining available components (may be one if this is a + Open RTE program, or [possibly] multiple if this is ompi_info) */ - mca_base_components_close(orte_ras_base.ras_output, - &orte_ras_base.ras_opened, NULL); + mca_base_components_close(orte_ras_base.ras_output, + &orte_ras_base.ras_opened, NULL); + } return ORTE_SUCCESS; } diff --git a/orte/mca/ras/base/ras_base_find_available.c b/orte/mca/ras/base/ras_base_find_available.c index 57b5acdb24..04dd046b3c 100644 --- a/orte/mca/ras/base/ras_base_find_available.c +++ b/orte/mca/ras/base/ras_base_find_available.c @@ -23,6 +23,7 @@ #include "opal/mca/mca.h" #include "opal/mca/base/base.h" #include "opal/util/output.h" + #include "orte/mca/ras/base/base.h" @@ -55,48 +56,52 @@ int orte_ras_base_find_available(void) int priority; orte_ras_base_cmp_t *cmp; - OBJ_CONSTRUCT(&orte_ras_base.ras_available, opal_list_t); - orte_ras_base.ras_available_valid = true; + orte_ras_base.ras_available_valid = false; + + if (orte_ras_base.ras_opened_valid) { + OBJ_CONSTRUCT(&orte_ras_base.ras_available, opal_list_t); + orte_ras_base.ras_available_valid = true; - for (item = opal_list_get_first(&orte_ras_base.ras_opened); - opal_list_get_end(&orte_ras_base.ras_opened) != item; - item = opal_list_get_next(item)) { - cli = (mca_base_component_list_item_t *) item; - component = (orte_ras_base_component_t *) cli->cli_component; - opal_output(orte_ras_base.ras_output, - "orte:ras:base:open: querying component %s", - component->ras_version.mca_component_name); - - /* Call the component's init function and see if it wants to be - selected */ - - module = component->ras_init(&priority); - - /* If we got a non-NULL module back, then the component wants - to be considered for selection */ - - if (NULL != module) { + for (item = opal_list_get_first(&orte_ras_base.ras_opened); + opal_list_get_end(&orte_ras_base.ras_opened) != item; + item = opal_list_get_next(item)) { + cli = (mca_base_component_list_item_t *) item; + component = (orte_ras_base_component_t *) cli->cli_component; opal_output(orte_ras_base.ras_output, - "orte:ras:base:open: component %s returns priority %d", - component->ras_version.mca_component_name, - priority); - - cmp = OBJ_NEW(orte_ras_base_cmp_t); - cmp->component = component; - cmp->module = module; - cmp->priority = priority; - - opal_list_append(&orte_ras_base.ras_available, &cmp->super); - } else { - opal_output(orte_ras_base.ras_output, - "orte:ras:base:open: component %s does NOT want to be considered for selection", + "orte:ras:base:open: querying component %s", component->ras_version.mca_component_name); + + /* Call the component's init function and see if it wants to be + selected */ + + module = component->ras_init(&priority); + + /* If we got a non-NULL module back, then the component wants + to be considered for selection */ + + if (NULL != module) { + opal_output(orte_ras_base.ras_output, + "orte:ras:base:open: component %s returns priority %d", + component->ras_version.mca_component_name, + priority); + + cmp = OBJ_NEW(orte_ras_base_cmp_t); + cmp->component = component; + cmp->module = module; + cmp->priority = priority; + + opal_list_append(&orte_ras_base.ras_available, &cmp->super); + } else { + opal_output(orte_ras_base.ras_output, + "orte:ras:base:open: component %s does NOT want to be considered for selection", + component->ras_version.mca_component_name); + } } + + /* Sort the resulting available list in priority order */ + opal_list_sort(&orte_ras_base.ras_available, compare); } - - /* Sort the resulting available list in priority order */ - opal_list_sort(&orte_ras_base.ras_available, compare); - + return ORTE_SUCCESS; } diff --git a/orte/mca/ras/base/ras_base_no_ops.c b/orte/mca/ras/base/ras_base_no_ops.c new file mode 100644 index 0000000000..18e1826d3a --- /dev/null +++ b/orte/mca/ras/base/ras_base_no_ops.c @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/util/output.h" +#include "orte/orte_constants.h" +#include "orte/mca/errmgr/errmgr.h" + +#include "orte/mca/ras/base/ras_private.h" + +int orte_ras_base_allocate_no_op(orte_jobid_t jobid) +{ + return ORTE_ERR_NOT_SUPPORTED; +} + +int orte_ras_base_node_insert_no_op(opal_list_t *list) +{ + return ORTE_ERR_NOT_SUPPORTED; +} + +int orte_ras_base_node_query_no_op(opal_list_t *list) +{ + return ORTE_ERR_NOT_SUPPORTED; +} + +int orte_ras_base_node_query_alloc_no_op(opal_list_t* list, orte_jobid_t job) +{ + return ORTE_ERR_NOT_SUPPORTED; +} + +orte_ras_node_t* orte_ras_base_node_lookup_no_op(orte_cellid_t cell, const char* nodename) +{ + return NULL; +} + +int orte_ras_base_deallocate_no_op(orte_jobid_t job) +{ + return ORTE_ERR_NOT_SUPPORTED; +} + diff --git a/orte/mca/ras/base/ras_base_node.c b/orte/mca/ras/base/ras_base_node.c index 99b313ffd6..39793aec27 100644 --- a/orte/mca/ras/base/ras_base_node.c +++ b/orte/mca/ras/base/ras_base_node.c @@ -28,7 +28,7 @@ #include "orte/mca/smr/smr_types.h" #include "orte/mca/gpr/gpr.h" #include "orte/mca/ns/ns.h" -#include "orte/mca/ras/base/ras_base_node.h" +#include "orte/mca/ras/base/ras_private.h" static void orte_ras_base_node_construct(orte_ras_node_t* node) { @@ -335,7 +335,19 @@ int orte_ras_base_node_query_alloc(opal_list_t* nodes, orte_jobid_t jobid) continue; } } - opal_list_append(nodes, &node->super); + /* check to see if any slots were reserved on this node for us + * The "get" command will return data from ALL nodes on the node + * segment. We ONLY want to include here nodes that are assigned + * to the specified job - i.e., nodes that have a node_slots_alloc_key + * for this jobid. If that is the case, then the node_slots_alloc will be + * set to a value greater than 0 + */ + if (0 < node->node_slots_alloc) { + opal_list_append(nodes, &node->super); + } else { + /* no slots were allocated to us on this node */ + OBJ_RELEASE(node); + } OBJ_RELEASE(value); } diff --git a/orte/mca/ras/base/ras_base_node.h b/orte/mca/ras/base/ras_base_node.h deleted file mode 100644 index 328fceb407..0000000000 --- a/orte/mca/ras/base/ras_base_node.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file */ - -#ifndef ORTE_RAS_BASE_NODE_H -#define ORTE_RAS_BASE_NODE_H - -#include "orte/orte_types.h" -#include "orte/mca/smr/smr_types.h" -#include "orte/mca/rmgr/rmgr_types.h" -#include "orte/mca/ras/ras.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/* - * Query the registry for all available nodes - */ -ORTE_DECLSPEC int orte_ras_base_node_query(opal_list_t*); - -/* - * Query the registry for a specific node - */ -ORTE_DECLSPEC orte_ras_node_t* orte_ras_base_node_lookup(orte_cellid_t, const char* nodename); - -/** - * Query the registry for all nodes allocated to a specific job - */ -ORTE_DECLSPEC int orte_ras_base_node_query_alloc(opal_list_t*, orte_jobid_t); - -/** - * Add the specified node definitions to the registry - */ -ORTE_DECLSPEC int orte_ras_base_node_insert(opal_list_t*); - -/** - * Delete the specified nodes from the registry - */ -ORTE_DECLSPEC int orte_ras_base_node_delete(opal_list_t*); - -/** - * Assign the allocated slots on the specified nodes to the - * indicated jobid. - */ -ORTE_DECLSPEC int orte_ras_base_node_assign(opal_list_t*, orte_jobid_t); - -/** - * Check to see if the node segment is empty - */ -ORTE_DECLSPEC int orte_ras_base_node_segment_empty(bool *empty); - - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif diff --git a/orte/mca/ras/base/ras_base_open.c b/orte/mca/ras/base/ras_base_open.c index 861b12c3c9..3cb73d6809 100644 --- a/orte/mca/ras/base/ras_base_open.c +++ b/orte/mca/ras/base/ras_base_open.c @@ -27,6 +27,8 @@ #include "orte/dss/dss.h" #include "orte/mca/errmgr/errmgr.h" + +#include "orte/mca/ras/base/ras_private.h" #include "orte/mca/ras/base/base.h" @@ -42,7 +44,26 @@ /* * Global variables */ -orte_ras_base_module_t orte_ras; +orte_ras_base_module_t orte_ras = { + orte_ras_base_allocate, + orte_ras_base_node_insert, + orte_ras_base_node_query, + orte_ras_base_node_query_alloc, + orte_ras_base_node_lookup, + orte_ras_base_deallocate, + orte_ras_base_finalize +}; + +orte_ras_base_module_t orte_ras_no_op = { + orte_ras_base_allocate_no_op, + orte_ras_base_node_insert_no_op, + orte_ras_base_node_query_no_op, + orte_ras_base_node_query_alloc_no_op, + orte_ras_base_node_lookup_no_op, + orte_ras_base_deallocate_no_op, + orte_ras_base_finalize +}; + orte_ras_base_t orte_ras_base; @@ -52,8 +73,9 @@ orte_ras_base_t orte_ras_base; */ int orte_ras_base_open(void) { - int value, rc; + int value, rc, param; orte_data_type_t tmp; + char *requested; /* Debugging / verbose output */ @@ -87,8 +109,28 @@ int orte_ras_base_open(void) return rc; } - /* Open up all available components */ + /* Some systems do not want any RAS support. In those cases, + * memory consumption is also an issue. For those systems, we + * avoid opening the RAS components by checking for a directive + * to use the "null" component. + */ + param = mca_base_param_reg_string_name("ras", NULL, NULL, + false, false, NULL, NULL); + if (ORTE_ERROR == mca_base_param_lookup_string(param, &requested)) { + return ORTE_ERROR; + } + if (NULL != requested && 0 == strcmp(requested, "null")) { + /* the user has specifically requested that we use the "null" + * component. In this case, that means we do NOT open any + * components, and we simply use the default module we have + * already defined above + */ + orte_ras_base.ras_opened_valid = false; + orte_ras = orte_ras_no_op; /* use the no_op module */ + return ORTE_SUCCESS; + } + /* Open up all available components */ if (ORTE_SUCCESS != mca_base_components_open("ras", orte_ras_base.ras_output, mca_ras_base_static_components, diff --git a/orte/mca/ras/base/ras_base_receive.c b/orte/mca/ras/base/ras_base_receive.c new file mode 100644 index 0000000000..84c62e74a0 --- /dev/null +++ b/orte/mca/ras/base/ras_base_receive.c @@ -0,0 +1,131 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + */ + +/* + * includes + */ +#include "orte_config.h" + +#include "orte/orte_constants.h" +#include "orte/orte_types.h" + +#include "opal/util/output.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/dss/dss.h" +#include "orte/util/proc_info.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rml/rml.h" + +#include "orte/mca/ras/base/ras_private.h" + +static bool recv_issued=false; + +int orte_ras_base_comm_start(void) +{ + int rc; + + if (recv_issued) { + return ORTE_SUCCESS; + } + + if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_RML_NAME_ANY, + ORTE_RML_TAG_RAS, + ORTE_RML_PERSISTENT, + orte_ras_base_recv, + NULL))) { + ORTE_ERROR_LOG(rc); + } + recv_issued = true; + + return rc; +} + + + +/* + * handle message from proxies + * NOTE: The incoming buffer "buffer" is OBJ_RELEASED by the calling program. + * DO NOT RELEASE THIS BUFFER IN THIS CODE + */ + +void orte_ras_base_recv(int status, orte_process_name_t* sender, + orte_buffer_t* buffer, orte_rml_tag_t tag, + void* cbdata) +{ + orte_buffer_t answer; + orte_ras_cmd_flag_t command; + orte_std_cntr_t count; + orte_jobid_t job; + int rc; + + count = 1; + if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &command, &count, ORTE_RAS_CMD))) { + ORTE_ERROR_LOG(rc); + return; + } + + OBJ_CONSTRUCT(&answer, orte_buffer_t); + + if (ORTE_SUCCESS != (rc = orte_dss.pack(&answer, &command, 1, ORTE_RAS_CMD))) { + ORTE_ERROR_LOG(rc); + } + + switch (command) { + case ORTE_RAS_ALLOCATE_CMD: + count = 1; + if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &job, &count, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + goto SEND_ANSWER; + } + + if (ORTE_SUCCESS != (rc = orte_ras_base_allocate(job))) { + ORTE_ERROR_LOG(rc); + } + break; + + case ORTE_RAS_DEALLOCATE_CMD: + count = 1; + if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &job, &count, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + goto SEND_ANSWER; + } + + if (ORTE_SUCCESS != (rc = orte_ras.deallocate_job(job))) { + ORTE_ERROR_LOG(rc); + } + break; + + default: + ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS); + } + +SEND_ANSWER: /* send the answer */ + if (0 > orte_rml.send_buffer(sender, &answer, tag, 0)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + } + + /* cleanup */ + OBJ_DESTRUCT(&answer); +} + diff --git a/orte/mca/ras/base/ras_private.h b/orte/mca/ras/base/ras_private.h new file mode 100644 index 0000000000..4feabc3c39 --- /dev/null +++ b/orte/mca/ras/base/ras_private.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + */ + +#ifndef ORTE_RAS_PRIVATE_H +#define ORTE_RAS_PRIVATE_H + +/* + * includes + */ +#include "orte_config.h" +#include "orte/orte_constants.h" + +#include "orte/dss/dss_types.h" +#include "orte/mca/ns/ns_types.h" +#include "orte/mca/rml/rml_types.h" +#include "orte/mca/ras/ras_types.h" + +#include "orte/mca/ras/ras.h" +#include "orte/mca/ras/base/base.h" + + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +/* Define the RAS command flag */ +typedef uint8_t orte_ras_cmd_flag_t; +#define ORTE_RAS_CMD ORTE_UINT8 + +/* define some commands */ +#define ORTE_RAS_ALLOCATE_CMD 0x01 +#define ORTE_RAS_DEALLOCATE_CMD 0x02 + +/* + * API function definitions + */ +ORTE_DECLSPEC int orte_ras_base_allocate(orte_jobid_t job); +ORTE_DECLSPEC int orte_ras_base_deallocate(orte_jobid_t job); + +/* + * NO_OP functions + */ +int orte_ras_base_allocate_no_op(orte_jobid_t job); + +int orte_ras_base_node_insert_no_op(opal_list_t *); + +int orte_ras_base_node_query_no_op(opal_list_t *); + +int orte_ras_base_deallocate_no_op(orte_jobid_t job); + +int orte_ras_base_node_query_alloc_no_op(opal_list_t*, orte_jobid_t); + +orte_ras_node_t* orte_ras_base_node_lookup_no_op(orte_cellid_t, const char* nodename); + +/* + * Internal support functions + */ +ORTE_DECLSPEC int orte_ras_base_allocate_nodes(orte_jobid_t jobid, + opal_list_t* nodes); + +/* + * Query the registry for all available nodes + */ +int orte_ras_base_node_query(opal_list_t*); + +/* + * Query the registry for a specific node + */ +orte_ras_node_t* orte_ras_base_node_lookup(orte_cellid_t, const char* nodename); + +/** + * Query the registry for all nodes allocated to a specific job + */ +int orte_ras_base_node_query_alloc(opal_list_t*, orte_jobid_t); + +/** + * Add the specified node definitions to the registry + */ +int orte_ras_base_node_insert(opal_list_t*); + +/** + * Delete the specified nodes from the registry + */ +int orte_ras_base_node_delete(opal_list_t*); + +/** + * Assign the allocated slots on the specified nodes to the + * indicated jobid. + */ +int orte_ras_base_node_assign(opal_list_t*, orte_jobid_t); + +/** + * Check to see if the node segment is empty + */ +int orte_ras_base_node_segment_empty(bool *empty); + + +/* + * oob interface + */ +int orte_ras_base_comm_start(void); + +void orte_ras_base_recv(int status, orte_process_name_t* sender, + orte_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata); + +/** Local data type functions */ +int orte_ras_base_copy_node(orte_ras_node_t **dest, orte_ras_node_t *src, orte_data_type_t type); +int orte_ras_base_compare_node(orte_ras_node_t *value1, orte_ras_node_t *value2, orte_data_type_t type); +int orte_ras_base_pack_node(orte_buffer_t *buffer, void *src, + orte_std_cntr_t num_vals, orte_data_type_t type); +int orte_ras_base_print_node(char **output, char *prefix, orte_ras_node_t *src, orte_data_type_t type); +void orte_ras_base_std_obj_release(orte_data_value_t *value); +int orte_ras_base_size_node(size_t *size, orte_ras_node_t *src, orte_data_type_t type); +int orte_ras_base_unpack_node(orte_buffer_t *buffer, void *dest, + orte_std_cntr_t *num_vals, orte_data_type_t type); + + + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif +#endif diff --git a/orte/mca/ras/bjs/ras_bjs.c b/orte/mca/ras/bjs/ras_bjs.c index ee033bf84d..512871fad3 100644 --- a/orte/mca/ras/bjs/ras_bjs.c +++ b/orte/mca/ras/bjs/ras_bjs.c @@ -23,12 +23,14 @@ #include "orte/orte_constants.h" #include "orte/orte_types.h" + #include "opal/util/argv.h" #include "opal/util/output.h" -#include "orte/mca/rmgr/base/base.h" -#include "orte/mca/ras/base/base.h" -#include "orte/mca/ras/base/ras_base_node.h" + +#include "orte/mca/rmgr/rmgr.h" #include "orte/mca/errmgr/errmgr.h" + +#include "orte/mca/ras/base/ras_private.h" #include "ras_bjs.h" @@ -164,8 +166,8 @@ static int orte_ras_bjs_discover( /* parse the node list and check node status/access */ nodes = getenv("NODES"); - if(NULL == nodes) { - return opal_list_get_size(nodelist) ? ORTE_SUCCESS : ORTE_ERR_NOT_AVAILABLE; + if (NULL == nodes) { + return ORTE_ERR_NOT_AVAILABLE; } OBJ_CONSTRUCT(&new_nodes, opal_list_t); @@ -242,7 +244,7 @@ static int orte_ras_bjs_allocate(orte_jobid_t jobid) orte_app_context_t **context = NULL; orte_std_cntr_t i, num_context; - rc = orte_rmgr_base_get_app_context(jobid, &context, &num_context); + rc = orte_rmgr.get_app_context(jobid, &context, &num_context); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -273,16 +275,6 @@ cleanup: return rc; } -static int orte_ras_bjs_node_insert(opal_list_t *nodes) -{ - return orte_ras_base_node_insert(nodes); -} - -static int orte_ras_bjs_node_query(opal_list_t *nodes) -{ - return orte_ras_base_node_query(nodes); -} - static int orte_ras_bjs_deallocate(orte_jobid_t jobid) { return ORTE_SUCCESS; @@ -297,8 +289,10 @@ static int orte_ras_bjs_finalize(void) orte_ras_base_module_t orte_ras_bjs_module = { orte_ras_bjs_allocate, - orte_ras_bjs_node_insert, - orte_ras_bjs_node_query, + orte_ras_base_node_insert, + orte_ras_base_node_query, + orte_ras_base_node_query_alloc, + orte_ras_base_node_lookup, orte_ras_bjs_deallocate, orte_ras_bjs_finalize }; diff --git a/orte/mca/ras/bjs/ras_bjs_component.c b/orte/mca/ras/bjs/ras_bjs_component.c index 7b0991c0ce..5bd7cb002f 100644 --- a/orte/mca/ras/bjs/ras_bjs_component.c +++ b/orte/mca/ras/bjs/ras_bjs_component.c @@ -39,10 +39,10 @@ orte_ras_bjs_component_t mca_ras_bjs_component = { information about the component itself */ { - /* Indicate that we are a iof v1.0.0 component (which also + /* Indicate that we are a ras v1.3.0 component (which also implies a specific MCA version) */ - ORTE_RAS_BASE_VERSION_1_0_0, + ORTE_RAS_BASE_VERSION_1_3_0, "bjs", /* MCA component name */ ORTE_MAJOR_VERSION, /* MCA component major version */ @@ -105,6 +105,11 @@ static int orte_ras_bjs_open(void) static orte_ras_base_module_t *orte_ras_bjs_init(int* priority) { + /* if we are not an HNP, then we must not be selected */ + if (!orte_process_info.seed) { + return NULL; + } + #if 0 if(getenv("NODES") == NULL) { return NULL; diff --git a/orte/mca/ras/dash_host/ras_dash_host_component.c b/orte/mca/ras/dash_host/ras_dash_host_component.c index 418bc7bd84..afa710ffdb 100644 --- a/orte/mca/ras/dash_host/ras_dash_host_component.c +++ b/orte/mca/ras/dash_host/ras_dash_host_component.c @@ -38,10 +38,10 @@ orte_ras_dash_host_component_t mca_ras_dash_host_component = { information about the component itself */ { - /* Indicate that we are a iof v1.0.0 component (which also + /* Indicate that we are a ras v1.3.0 component (which also implies a specific MCA version) */ - ORTE_RAS_BASE_VERSION_1_0_0, + ORTE_RAS_BASE_VERSION_1_3_0, "dash_host", /* MCA component name */ ORTE_MAJOR_VERSION, /* MCA component major version */ diff --git a/orte/mca/ras/dash_host/ras_dash_host_module.c b/orte/mca/ras/dash_host/ras_dash_host_module.c index 95cffec164..1d5091ac04 100644 --- a/orte/mca/ras/dash_host/ras_dash_host_module.c +++ b/orte/mca/ras/dash_host/ras_dash_host_module.c @@ -17,17 +17,17 @@ */ #include "orte_config.h" +#include "orte/orte_constants.h" +#include "orte/orte_types.h" #include "opal/util/output.h" #include "opal/util/argv.h" -#include "orte/orte_constants.h" -#include "orte/orte_types.h" -#include "orte/mca/ras/base/base.h" -#include "orte/mca/ras/base/ras_base_node.h" -#include "orte/mca/rmgr/base/base.h" -#include "orte/mca/ras/base/ras_base_node.h" -#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rmgr/rmgr.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/util/proc_info.h" + +#include "orte/mca/ras/base/ras_private.h" #include "orte/mca/ras/dash_host/ras_dash_host.h" @@ -46,6 +46,8 @@ orte_ras_base_module_t orte_ras_dash_host_module = { orte_ras_dash_host_allocate, orte_ras_base_node_insert, orte_ras_base_node_query, + orte_ras_base_node_query_alloc, + orte_ras_base_node_lookup, orte_ras_dash_host_deallocate, orte_ras_dash_host_finalize }; @@ -53,6 +55,11 @@ orte_ras_base_module_t orte_ras_dash_host_module = { orte_ras_base_module_t *orte_ras_dash_host_init(int* priority) { + /* if we are not an HNP, then we must not be selected */ + if (!orte_process_info.seed) { + return NULL; + } + *priority = mca_ras_dash_host_component.priority; return &orte_ras_dash_host_module; } @@ -83,7 +90,7 @@ static int orte_ras_dash_host_allocate(orte_jobid_t jobid) /* Otherwise, get the context */ - rc = orte_rmgr_base_get_app_context(jobid, &context, &num_context); + rc = orte_rmgr.get_app_context(jobid, &context, &num_context); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; diff --git a/orte/mca/ras/gridengine/ras_gridengine_component.c b/orte/mca/ras/gridengine/ras_gridengine_component.c index ce68cc26da..2cb865df07 100644 --- a/orte/mca/ras/gridengine/ras_gridengine_component.c +++ b/orte/mca/ras/gridengine/ras_gridengine_component.c @@ -45,10 +45,10 @@ orte_ras_gridengine_component_t mca_ras_gridengine_component = { information about the component itself */ { - /* Indicate that we are a ras v1.0.0 component (which also + /* Indicate that we are a ras v1.3.0 component (which also implies a specific MCA version) */ - ORTE_RAS_BASE_VERSION_1_0_0, + ORTE_RAS_BASE_VERSION_1_3_0, "gridengine", /* MCA component name */ ORTE_MAJOR_VERSION, /* MCA component major version */ ORTE_MINOR_VERSION, /* MCA component minor version */ @@ -98,6 +98,11 @@ static int orte_ras_gridengine_open(void) static orte_ras_base_module_t *orte_ras_gridengine_init(int* priority) { + /* if we are not an HNP, then we must not be selected */ + if (!orte_process_info.seed) { + return NULL; + } + *priority = mca_ras_gridengine_component.priority; if (NULL != getenv("SGE_ROOT") && NULL != getenv("ARC") && diff --git a/orte/mca/ras/gridengine/ras_gridengine_module.c b/orte/mca/ras/gridengine/ras_gridengine_module.c index 89c5bf64d3..9418effd70 100644 --- a/orte/mca/ras/gridengine/ras_gridengine_module.c +++ b/orte/mca/ras/gridengine/ras_gridengine_module.c @@ -21,31 +21,31 @@ * @file: * Resource Allocation for Grid Engine */ +#include "orte_config.h" +#include "orte/orte_constants.h" #include #include #include + #include "opal/util/argv.h" #include "opal/util/output.h" #include "opal/util/show_help.h" -#include "orte_config.h" -#include "orte/orte_constants.h" -#include "orte/mca/rmgr/base/base.h" -#include "orte/mca/ras/gridengine/ras_gridengine.h" -#include "orte/mca/ras/base/base.h" -#include "orte/mca/ras/base/ras_base_node.h" + +#include "orte/mca/rmgr/rmgr.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/ns/ns.h" #include "orte/mca/gpr/gpr.h" +#include "orte/mca/ras/base/ras_private.h" +#include "orte/mca/ras/gridengine/ras_gridengine.h" + /* * Local functions */ static int orte_ras_gridengine_allocate(orte_jobid_t jobid); static int orte_ras_gridengine_discover(opal_list_t* nodelist, orte_app_context_t** context, orte_std_cntr_t num_context); -static int orte_ras_gridengine_node_insert(opal_list_t* nodes); -static int orte_ras_gridengine_node_query(opal_list_t* nodes); static int orte_ras_gridengine_deallocate(orte_jobid_t jobid); static int orte_ras_gridengine_finalize(void); static int get_slot_count(char* node_name, int* slot_cnt); @@ -57,8 +57,10 @@ static int get_slot_keyval(orte_ras_node_t* node, int* slot_cnt); */ orte_ras_base_module_t orte_ras_gridengine_module = { orte_ras_gridengine_allocate, - orte_ras_gridengine_node_insert, - orte_ras_gridengine_node_query, + orte_ras_base_node_insert, + orte_ras_base_node_query, + orte_ras_base_node_query_alloc, + orte_ras_base_node_lookup, orte_ras_gridengine_deallocate, orte_ras_gridengine_finalize }; @@ -77,7 +79,7 @@ static int orte_ras_gridengine_allocate(orte_jobid_t jobid) orte_std_cntr_t i, num_context; /* get the context */ - rc = orte_rmgr_base_get_app_context(jobid, &context, &num_context); + rc = orte_rmgr.get_app_context(jobid, &context, &num_context); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -139,7 +141,7 @@ static int orte_ras_gridengine_discover(opal_list_t* nodelist, } /* query the nodelist from the registry */ - if(ORTE_SUCCESS != (rc = orte_ras_gridengine_node_query(nodelist))) { + if(ORTE_SUCCESS != (rc = orte_ras_base_node_query(nodelist))) { ORTE_ERROR_LOG(rc); goto cleanup; } @@ -214,7 +216,7 @@ static int orte_ras_gridengine_discover(opal_list_t* nodelist, if(opal_list_get_size(&new_nodes)) { opal_output(mca_ras_gridengine_component.verbose, "ras:gridengine: adding new nodes to the registry"); - rc = orte_ras_gridengine_node_insert(&new_nodes); + rc = orte_ras_base_node_insert(&new_nodes); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); } @@ -427,22 +429,6 @@ static int get_slot_count(char* node_name, int* slot_cnt) return ORTE_ERROR; } -/** - * call the base class to insert nodes - */ -static int orte_ras_gridengine_node_insert(opal_list_t *nodes) -{ - return orte_ras_base_node_insert(nodes); -} - -/** - * call the base class to query nodes - */ -static int orte_ras_gridengine_node_query(opal_list_t *nodes) -{ - return orte_ras_base_node_query(nodes); -} - /** * call the base class to deallocate nodes */ diff --git a/orte/mca/ras/hostfile/ras_hostfile_component.c b/orte/mca/ras/hostfile/ras_hostfile_component.c index 56325251a9..2706e067e9 100644 --- a/orte/mca/ras/hostfile/ras_hostfile_component.c +++ b/orte/mca/ras/hostfile/ras_hostfile_component.c @@ -38,10 +38,10 @@ orte_ras_hostfile_component_t mca_ras_hostfile_component = { information about the component itself */ { - /* Indicate that we are a iof v1.0.0 component (which also + /* Indicate that we are a ras v1.3.0 component (which also implies a specific MCA version) */ - ORTE_RAS_BASE_VERSION_1_0_0, + ORTE_RAS_BASE_VERSION_1_3_0, "hostfile", /* MCA component name */ ORTE_MAJOR_VERSION, /* MCA component major version */ diff --git a/orte/mca/ras/hostfile/ras_hostfile_module.c b/orte/mca/ras/hostfile/ras_hostfile_module.c index 933c4deef0..729d97b395 100644 --- a/orte/mca/ras/hostfile/ras_hostfile_module.c +++ b/orte/mca/ras/hostfile/ras_hostfile_module.c @@ -22,11 +22,10 @@ #include "opal/util/argv.h" #include "orte/orte_constants.h" #include "orte/orte_types.h" -#include "orte/mca/ras/base/base.h" -#include "orte/mca/ras/base/ras_base_node.h" +#include "orte/mca/ras/base/ras_private.h" #include "orte/mca/rmgr/base/base.h" -#include "orte/mca/ras/base/ras_base_node.h" #include "orte/mca/errmgr/errmgr.h" +#include "orte/util/proc_info.h" #include "orte/mca/ras/hostfile/ras_hostfile.h" @@ -46,6 +45,8 @@ orte_ras_base_module_t orte_ras_hostfile_module = { orte_ras_hostfile_allocate, orte_ras_base_node_insert, orte_ras_base_node_query, + orte_ras_base_node_query_alloc, + orte_ras_base_node_lookup, orte_ras_hostfile_deallocate, orte_ras_hostfile_finalize }; @@ -53,6 +54,11 @@ orte_ras_base_module_t orte_ras_hostfile_module = { orte_ras_base_module_t *orte_ras_hostfile_init(int* priority) { + /* if we are not an HNP, then we must not be selected */ + if (!orte_process_info.seed) { + return NULL; + } + *priority = mca_ras_hostfile_component.priority; return &orte_ras_hostfile_module; } diff --git a/orte/mca/ras/localhost/ras_localhost_component.c b/orte/mca/ras/localhost/ras_localhost_component.c index 43e884f095..e09f1636de 100644 --- a/orte/mca/ras/localhost/ras_localhost_component.c +++ b/orte/mca/ras/localhost/ras_localhost_component.c @@ -38,10 +38,10 @@ orte_ras_localhost_component_t mca_ras_localhost_component = { information about the component itself */ { - /* Indicate that we are a iof v1.0.0 component (which also + /* Indicate that we are a ras v1.3.0 component (which also implies a specific MCA version) */ - ORTE_RAS_BASE_VERSION_1_0_0, + ORTE_RAS_BASE_VERSION_1_3_0, "localhost", /* MCA component name */ ORTE_MAJOR_VERSION, /* MCA component major version */ diff --git a/orte/mca/ras/localhost/ras_localhost_module.c b/orte/mca/ras/localhost/ras_localhost_module.c index 43264fb404..3c5c1ae9f1 100644 --- a/orte/mca/ras/localhost/ras_localhost_module.c +++ b/orte/mca/ras/localhost/ras_localhost_module.c @@ -24,11 +24,10 @@ #include "opal/util/output.h" #include "orte/util/sys_info.h" -#include "orte/mca/ras/base/base.h" -#include "orte/mca/ras/base/ras_base_node.h" +#include "orte/mca/ras/base/ras_private.h" #include "orte/mca/rmgr/base/base.h" -#include "orte/mca/ras/base/ras_base_node.h" #include "orte/mca/errmgr/errmgr.h" +#include "orte/util/proc_info.h" #include "orte/mca/ras/localhost/ras_localhost.h" @@ -48,6 +47,8 @@ orte_ras_base_module_t orte_ras_localhost_module = { orte_ras_localhost_allocate, orte_ras_base_node_insert, orte_ras_base_node_query, + orte_ras_base_node_query_alloc, + orte_ras_base_node_lookup, orte_ras_localhost_deallocate, orte_ras_localhost_finalize }; @@ -55,6 +56,11 @@ orte_ras_base_module_t orte_ras_localhost_module = { orte_ras_base_module_t *orte_ras_localhost_init(int* priority) { + /* if we are not an HNP, then we must not be selected */ + if (!orte_process_info.seed) { + return NULL; + } + *priority = mca_ras_localhost_component.priority; return &orte_ras_localhost_module; } diff --git a/orte/mca/ras/lsf_bproc/ras_lsf_bproc.c b/orte/mca/ras/lsf_bproc/ras_lsf_bproc.c index 2d8a3ca86b..1621cbe015 100644 --- a/orte/mca/ras/lsf_bproc/ras_lsf_bproc.c +++ b/orte/mca/ras/lsf_bproc/ras_lsf_bproc.c @@ -21,6 +21,8 @@ #include #include "orte/orte_constants.h" + +#include "orte/mca/ras/base/ras_private.h" #include "ras_lsf_bproc.h" @@ -29,16 +31,6 @@ static int orte_ras_lsf_bproc_allocate(orte_jobid_t jobid) return ORTE_SUCCESS; } -static int orte_ras_lsf_bproc_node_insert(opal_list_t *nodes) -{ - return ORTE_ERROR; -} - -static int orte_ras_lsf_bproc_node_query(opal_list_t *nodes) -{ - return ORTE_ERROR; -} - static int orte_ras_lsf_bproc_deallocate(orte_jobid_t jobid) { return ORTE_SUCCESS; @@ -53,8 +45,10 @@ static int orte_ras_lsf_bproc_finalize(void) orte_ras_base_module_t orte_ras_lsf_bproc_module = { orte_ras_lsf_bproc_allocate, - orte_ras_lsf_bproc_node_insert, - orte_ras_lsf_bproc_node_query, + orte_ras_base_node_insert, + orte_ras_base_node_query, + orte_ras_base_node_query_alloc, + orte_ras_base_node_lookup, orte_ras_lsf_bproc_deallocate, orte_ras_lsf_bproc_finalize }; diff --git a/orte/mca/ras/lsf_bproc/ras_lsf_bproc_component.c b/orte/mca/ras/lsf_bproc/ras_lsf_bproc_component.c index ba9e66023f..475d2023fa 100644 --- a/orte/mca/ras/lsf_bproc/ras_lsf_bproc_component.c +++ b/orte/mca/ras/lsf_bproc/ras_lsf_bproc_component.c @@ -39,10 +39,10 @@ orte_ras_lsf_bproc_component_t mca_ras_lsf_bproc_component = { information about the component itself */ { - /* Indicate that we are a iof v1.0.0 component (which also + /* Indicate that we are a ras v1.3.0 component (which also implies a specific MCA version) */ - ORTE_RAS_BASE_VERSION_1_0_0, + ORTE_RAS_BASE_VERSION_1_3_0, "lsf_bproc", /* MCA component name */ ORTE_MAJOR_VERSION, /* MCA component major version */ @@ -90,6 +90,11 @@ static int orte_ras_lsf_bproc_open(void) static orte_ras_base_module_t *orte_ras_lsf_bproc_init(int* priority) { + /* if we are not an HNP, then we must not be selected */ + if (!orte_process_info.seed) { + return NULL; + } + *priority = mca_ras_lsf_bproc_component.priority; return NULL; } diff --git a/orte/mca/ras/poe/ras_poe_component.c b/orte/mca/ras/poe/ras_poe_component.c index 9ec0462fae..3ec0e70726 100644 --- a/orte/mca/ras/poe/ras_poe_component.c +++ b/orte/mca/ras/poe/ras_poe_component.c @@ -39,10 +39,10 @@ orte_ras_poe_component_t mca_ras_poe_component = { information about the component itself */ { - /* Indicate that we are a iof v1.0.0 component (which also + /* Indicate that we are a ras v1.3.0 component (which also implies a specific MCA version) */ - ORTE_RAS_BASE_VERSION_1_0_0, + ORTE_RAS_BASE_VERSION_1_3_0, "poe", /* MCA component name */ ORTE_MAJOR_VERSION, /* MCA component major version */ ORTE_MINOR_VERSION, /* MCA component minor version */ @@ -81,6 +81,11 @@ static int orte_ras_poe_open(void) static orte_ras_base_module_t *orte_ras_poe_init(int* priority) { + /* if we are not an HNP, then we must not be selected */ + if (!orte_process_info.seed) { + return NULL; + } + *priority = mca_ras_poe_component.priority; if ( NULL != getenv("LOADL_PID") ) { diff --git a/orte/mca/ras/poe/ras_poe_module.c b/orte/mca/ras/poe/ras_poe_module.c index 9d791189b0..ce4ef95d99 100644 --- a/orte/mca/ras/poe/ras_poe_module.c +++ b/orte/mca/ras/poe/ras_poe_module.c @@ -23,8 +23,7 @@ #include "opal/util/argv.h" #include "orte/orte_constants.h" #include "orte/mca/ras/poe/ras_poe.h" -#include "orte/mca/ras/base/base.h" -#include "orte/mca/ras/base/ras_base_node.h" +#include "orte/mca/ras/base/ras_private.h" static int orte_ras_poe_allocate(orte_jobid_t jobid) { @@ -71,16 +70,6 @@ static int orte_ras_poe_allocate(orte_jobid_t jobid) return ret; } -static int orte_ras_poe_node_insert(opal_list_t *nodes) -{ - return orte_ras_base_node_insert(nodes); -} - -static int orte_ras_poe_node_query(opal_list_t *nodes) -{ - return orte_ras_base_node_insert(nodes); -} - static int orte_ras_poe_deallocate(orte_jobid_t jobid) { return ORTE_SUCCESS; @@ -93,8 +82,10 @@ static int orte_ras_poe_finalize(void) orte_ras_base_module_t orte_ras_poe_module = { orte_ras_poe_allocate, - orte_ras_poe_node_insert, - orte_ras_poe_node_query, + orte_ras_base_node_insert, + orte_ras_base_node_query, + orte_ras_base_node_query_alloc, + orte_ras_base_node_lookup, orte_ras_poe_deallocate, orte_ras_poe_finalize }; diff --git a/orte/mca/ras/proxy/Makefile.am b/orte/mca/ras/proxy/Makefile.am new file mode 100644 index 0000000000..fc12a4a0ed --- /dev/null +++ b/orte/mca/ras/proxy/Makefile.am @@ -0,0 +1,51 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Use the top-level Makefile.options + + + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if OMPI_BUILD_ras_proxy_DSO +component_noinst = +component_install = mca_ras_proxy.la +else +component_noinst = libmca_ras_proxy.la +component_install = +endif + +proxy_SOURCES = \ + ras_proxy.c \ + ras_proxy.h \ + ras_proxy_component.c + +mcacomponentdir = $(libdir)/openmpi +mcacomponent_LTLIBRARIES = $(component_install) +mca_ras_proxy_la_SOURCES = $(proxy_SOURCES) +mca_ras_proxy_la_LIBADD = \ + $(top_ompi_builddir)/orte/liborte.la \ + $(top_ompi_builddir)/opal/libopal.la +mca_ras_proxy_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_ras_proxy_la_SOURCES = $(proxy_SOURCES) +libmca_ras_proxy_la_LIBADD = +libmca_ras_proxy_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/ras/proxy/configure.params b/orte/mca/ras/proxy/configure.params new file mode 100644 index 0000000000..deee7be837 --- /dev/null +++ b/orte/mca/ras/proxy/configure.params @@ -0,0 +1,23 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Specific to this module + +PARAM_INIT_FILE=ras_proxy.c +PARAM_CONFIG_FILES="Makefile" diff --git a/orte/mca/ras/proxy/ras_proxy.c b/orte/mca/ras/proxy/ras_proxy.c new file mode 100644 index 0000000000..3014711a0b --- /dev/null +++ b/orte/mca/ras/proxy/ras_proxy.c @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + */ + +#include "orte_config.h" + +#include + +#include "orte/orte_constants.h" +#include "orte/orte_types.h" + +#include "orte/dss/dss.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rml/rml.h" + +#include "orte/mca/ras/base/ras_private.h" +#include "ras_proxy.h" + +/** + * globals + */ + +/* + * functions + */ + +int orte_ras_proxy_allocate(orte_jobid_t job) +{ + orte_buffer_t* cmd; + orte_buffer_t* answer; + orte_ras_cmd_flag_t command; + orte_std_cntr_t count; + int rc; + + command = ORTE_RAS_ALLOCATE_CMD; + + cmd = OBJ_NEW(orte_buffer_t); + if (cmd == NULL) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_RAS_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &job, 1, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + if (0 > orte_rml.send_buffer(orte_ras_proxy_replica, cmd, ORTE_RML_TAG_RAS, 0)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(cmd); + return ORTE_ERR_COMM_FAILURE; + } + OBJ_RELEASE(cmd); + + answer = OBJ_NEW(orte_buffer_t); + if(answer == NULL) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + if (0 > orte_rml.recv_buffer(orte_ras_proxy_replica, answer, ORTE_RML_TAG_RAS)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(answer); + return ORTE_ERR_COMM_FAILURE; + } + + count = 1; + if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_RAS_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(answer); + return rc; + } + + if (ORTE_RAS_ALLOCATE_CMD != command) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(answer); + return ORTE_ERR_COMM_FAILURE; + } + + OBJ_RELEASE(answer); + return ORTE_SUCCESS; +} + +int orte_ras_proxy_deallocate(orte_jobid_t job) +{ + orte_buffer_t* cmd; + orte_buffer_t* answer; + orte_ras_cmd_flag_t command; + orte_std_cntr_t count; + int rc; + + command = ORTE_RAS_DEALLOCATE_CMD; + + cmd = OBJ_NEW(orte_buffer_t); + if (cmd == NULL) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_RAS_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &job, 1, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + if (0 > orte_rml.send_buffer(orte_ras_proxy_replica, cmd, ORTE_RML_TAG_RAS, 0)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(cmd); + return ORTE_ERR_COMM_FAILURE; + } + OBJ_RELEASE(cmd); + + answer = OBJ_NEW(orte_buffer_t); + if(answer == NULL) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + if (0 > orte_rml.recv_buffer(orte_ras_proxy_replica, answer, ORTE_RML_TAG_RAS)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(answer); + return ORTE_ERR_COMM_FAILURE; + } + + count = 1; + if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_RAS_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(answer); + return rc; + } + + if (ORTE_RAS_DEALLOCATE_CMD != command) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(answer); + return ORTE_ERR_COMM_FAILURE; + } + + OBJ_RELEASE(answer); + return ORTE_SUCCESS; +} diff --git a/orte/mca/ras/proxy/ras_proxy.h b/orte/mca/ras/proxy/ras_proxy.h new file mode 100644 index 0000000000..bb2dcc66c9 --- /dev/null +++ b/orte/mca/ras/proxy/ras_proxy.h @@ -0,0 +1,58 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ +#ifndef RAS_PROXY_H +#define RAS_PROXY_H + +#include "orte_config.h" + +#include "orte/mca/ras/ras.h" + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +/* my replica */ +extern orte_process_name_t *orte_ras_proxy_replica; + +/* + * Module open / close + */ +int orte_ras_proxy_open(void); +int orte_ras_proxy_close(void); + + +/* + * Startup / Shutdown + */ +orte_ras_base_module_t* orte_ras_proxy_init(int* priority); +int orte_ras_proxy_finalize(void); + +/* + * proxy function prototypes + */ +int orte_ras_proxy_allocate(orte_jobid_t job); +int orte_ras_proxy_deallocate(orte_jobid_t job); + + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + +#endif diff --git a/orte/mca/ras/proxy/ras_proxy_component.c b/orte/mca/ras/proxy/ras_proxy_component.c new file mode 100644 index 0000000000..ed57353605 --- /dev/null +++ b/orte/mca/ras/proxy/ras_proxy_component.c @@ -0,0 +1,123 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + */ + +/* + * includes + */ +#include "orte_config.h" + +#include "orte/orte_constants.h" +#include "orte/util/proc_info.h" +#include "opal/util/output.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/mca/ras/base/ras_private.h" +#include "ras_proxy.h" + +/* + * Struct of function pointers that need to be initialized + */ +orte_ras_base_component_t mca_ras_proxy_component = { + { + ORTE_RAS_BASE_VERSION_1_3_0, + + "proxy", /* MCA module name */ + ORTE_MAJOR_VERSION, /* MCA module major version */ + ORTE_MINOR_VERSION, /* MCA module minor version */ + ORTE_RELEASE_VERSION, /* MCA module release version */ + orte_ras_proxy_open, /* module open */ + orte_ras_proxy_close /* module close */ + }, + { + false /* checkpoint / restart */ + }, + orte_ras_proxy_init /* module init */ +}; + +/* + * setup the function pointers for the module + */ +static orte_ras_base_module_t orte_ras_proxy_module = { + orte_ras_proxy_allocate, + orte_ras_base_node_insert, + orte_ras_base_node_query, + orte_ras_base_node_query_alloc, + orte_ras_base_node_lookup, + orte_ras_proxy_deallocate, + orte_ras_proxy_finalize +}; + +/* + * Whether or not we allowed this component to be selected + */ +static bool initialized = false; + +/* the name of our replica */ +orte_process_name_t *orte_ras_proxy_replica; + +/* + * Not much to do here. + */ +int orte_ras_proxy_open(void) +{ + return ORTE_SUCCESS; +} + +/* + * ditto for this one + */ +int orte_ras_proxy_close(void) +{ + return ORTE_SUCCESS; +} + +orte_ras_base_module_t* orte_ras_proxy_init(int* priority) +{ + /* If we are an HNP, then don't pick us */ + if (orte_process_info.seed) { + return NULL; + } + + /* define the replica for us to use - for now, just point + * to the name service replica + */ + orte_ras_proxy_replica = orte_process_info.ns_replica; + + initialized = true; + *priority = 1; + return &orte_ras_proxy_module; +} + + +/* + * finalize routine + */ +int orte_ras_proxy_finalize(void) +{ + initialized = false; + + /* All done */ + + return ORTE_SUCCESS; +} diff --git a/orte/mca/ras/ras.h b/orte/mca/ras/ras.h index 0c99c0d77e..3da46a4f65 100644 --- a/orte/mca/ras/ras.h +++ b/orte/mca/ras/ras.h @@ -200,24 +200,38 @@ typedef int (*orte_ras_base_module_node_insert_fn_t)(opal_list_t *); typedef int (*orte_ras_base_module_node_query_fn_t)(opal_list_t *); /** - * ras module version 1.0.0 + * Query the registry for all nodes allocated to a specific job */ -struct orte_ras_base_module_1_0_0_t { +typedef int (*orte_ras_base_module_node_query_alloc_fn_t)(opal_list_t*, orte_jobid_t); + +/* + * Query the registry for a specific node + */ +typedef orte_ras_node_t* (*orte_ras_base_module_node_lookup_fn_t)(orte_cellid_t, const char* nodename); + +/** + * ras module version 1.3.0 + */ +struct orte_ras_base_module_1_3_0_t { /** Allocation function pointer */ - orte_ras_base_module_allocate_fn_t allocate; + orte_ras_base_module_allocate_fn_t allocate_job; /** Node Insertion function pointer */ - orte_ras_base_module_node_insert_fn_t node_insert; + orte_ras_base_module_node_insert_fn_t node_insert; /** Node Query function pointer */ - orte_ras_base_module_node_query_fn_t node_query; + orte_ras_base_module_node_query_fn_t node_query; + /* node query allocate function pointer */ + orte_ras_base_module_node_query_alloc_fn_t node_query_alloc; + /* node lookup */ + orte_ras_base_module_node_lookup_fn_t node_lookup; /** Deallocation function pointer */ - orte_ras_base_module_deallocate_fn_t deallocate; + orte_ras_base_module_deallocate_fn_t deallocate_job; /** Finalization function pointer */ - orte_ras_base_module_finalize_fn_t finalize; + orte_ras_base_module_finalize_fn_t finalize; }; /** Convenience typedef */ -typedef struct orte_ras_base_module_1_0_0_t orte_ras_base_module_1_0_0_t; +typedef struct orte_ras_base_module_1_3_0_t orte_ras_base_module_1_3_0_t; /** Convenience typedef */ -typedef orte_ras_base_module_1_0_0_t orte_ras_base_module_t; +typedef orte_ras_base_module_1_3_0_t orte_ras_base_module_t; /* * ras component @@ -232,7 +246,7 @@ typedef orte_ras_base_module_t* (*orte_ras_base_component_init_fn_t)(int* priori /** * ras component version 1.0.0 */ -struct orte_ras_base_component_1_0_0_t { +struct orte_ras_base_component_1_3_0_t { /** Base MCA structure */ mca_base_component_t ras_version; /** Base MCA data */ @@ -241,19 +255,19 @@ struct orte_ras_base_component_1_0_0_t { orte_ras_base_component_init_fn_t ras_init; }; /** Convenience typedef */ -typedef struct orte_ras_base_component_1_0_0_t orte_ras_base_component_1_0_0_t; +typedef struct orte_ras_base_component_1_3_0_t orte_ras_base_component_1_3_0_t; /** Convenience typedef */ -typedef orte_ras_base_component_1_0_0_t orte_ras_base_component_t; +typedef orte_ras_base_component_1_3_0_t orte_ras_base_component_t; /** * Macro for use in components that are of type ras v1.0.0 */ -#define ORTE_RAS_BASE_VERSION_1_0_0 \ - /* ras v1.0 is chained to MCA v1.0 */ \ +#define ORTE_RAS_BASE_VERSION_1_3_0 \ + /* ras v1.3 is chained to MCA v1.0 */ \ MCA_BASE_VERSION_1_0_0, \ - /* ras v1.0 */ \ - "ras", 1, 0, 0 + /* ras v1.3 */ \ + "ras", 1, 3, 0 /* * global module that holds function pointers diff --git a/orte/mca/ras/slurm/ras_slurm.h b/orte/mca/ras/slurm/ras_slurm.h index f8e3c8ffd2..a954d01192 100644 --- a/orte/mca/ras/slurm/ras_slurm.h +++ b/orte/mca/ras/slurm/ras_slurm.h @@ -30,7 +30,7 @@ extern "C" { #endif - ORTE_DECLSPEC extern orte_ras_base_component_1_0_0_t mca_ras_slurm_component; + ORTE_DECLSPEC extern orte_ras_base_component_t mca_ras_slurm_component; ORTE_DECLSPEC extern orte_ras_base_module_t orte_ras_slurm_module; #if defined(c_plusplus) || defined(__cplusplus) diff --git a/orte/mca/ras/slurm/ras_slurm_component.c b/orte/mca/ras/slurm/ras_slurm_component.c index 480db7b619..73bf46c267 100644 --- a/orte/mca/ras/slurm/ras_slurm_component.c +++ b/orte/mca/ras/slurm/ras_slurm_component.c @@ -22,6 +22,7 @@ #include "opal/mca/base/base.h" #include "opal/mca/base/mca_base_param.h" #include "orte/orte_constants.h" +#include "orte/util/proc_info.h" #include "ras_slurm.h" @@ -38,15 +39,15 @@ static int ras_slurm_open(void); static orte_ras_base_module_t *ras_slurm_init(int*); -orte_ras_base_component_1_0_0_t mca_ras_slurm_component = { +orte_ras_base_component_t mca_ras_slurm_component = { /* First, the mca_base_component_t struct containing meta information about the component itself */ { - /* Indicate that we are a iof v1.0.0 component (which also + /* Indicate that we are a ras v1.3.0 component (which also implies a specific MCA version) */ - ORTE_RAS_BASE_VERSION_1_0_0, + ORTE_RAS_BASE_VERSION_1_3_0, /* Component name and version */ @@ -85,6 +86,11 @@ static int ras_slurm_open(void) static orte_ras_base_module_t *ras_slurm_init(int* priority) { + /* if we are not an HNP, then we must not be selected */ + if (!orte_process_info.seed) { + return NULL; + } + /* Are we running under a SLURM job? */ if (NULL != getenv("SLURM_JOBID")) { diff --git a/orte/mca/ras/slurm/ras_slurm_module.c b/orte/mca/ras/slurm/ras_slurm_module.c index 122d68dc74..b0e5f242a8 100644 --- a/orte/mca/ras/slurm/ras_slurm_module.c +++ b/orte/mca/ras/slurm/ras_slurm_module.c @@ -26,8 +26,7 @@ #include "opal/util/show_help.h" #include "orte/orte_constants.h" #include "orte/orte_types.h" -#include "orte/mca/ras/base/base.h" -#include "orte/mca/ras/base/ras_base_node.h" +#include "orte/mca/ras/base/ras_private.h" #include "ras_slurm.h" @@ -35,8 +34,6 @@ * Local functions */ static int allocate(orte_jobid_t jobid); -static int node_insert(opal_list_t *); -static int node_query(opal_list_t *); static int deallocate(orte_jobid_t jobid); static int finalize(void); @@ -51,8 +48,10 @@ static int parse_range(char *base, char *range, char ***nodelist); */ orte_ras_base_module_t orte_ras_slurm_module = { allocate, - node_insert, - node_query, + orte_ras_base_node_insert, + orte_ras_base_node_query, + orte_ras_base_node_query_alloc, + orte_ras_base_node_lookup, deallocate, finalize }; @@ -102,16 +101,6 @@ static int allocate(orte_jobid_t jobid) return ret; } -static int node_insert(opal_list_t *nodes) -{ - return orte_ras_base_node_insert(nodes); -} - -static int node_query(opal_list_t *nodes) -{ - return orte_ras_base_node_query(nodes); -} - /* * There's really nothing to do here */ diff --git a/orte/mca/ras/tm/ras_tm.h b/orte/mca/ras/tm/ras_tm.h index 8c798d1d43..86b70833c6 100644 --- a/orte/mca/ras/tm/ras_tm.h +++ b/orte/mca/ras/tm/ras_tm.h @@ -30,7 +30,7 @@ extern "C" { #endif - ORTE_DECLSPEC extern orte_ras_base_component_1_0_0_t mca_ras_tm_component; + ORTE_DECLSPEC extern orte_ras_base_component_t mca_ras_tm_component; ORTE_DECLSPEC extern orte_ras_base_module_t orte_ras_tm_module; #if defined(c_plusplus) || defined(__cplusplus) diff --git a/orte/mca/ras/tm/ras_tm_component.c b/orte/mca/ras/tm/ras_tm_component.c index 0039941f09..a6871b0c81 100644 --- a/orte/mca/ras/tm/ras_tm_component.c +++ b/orte/mca/ras/tm/ras_tm_component.c @@ -22,6 +22,7 @@ #include "opal/mca/base/mca_base_param.h" #include "opal/util/output.h" #include "orte/orte_constants.h" +#include "orte/util/proc_info.h" #include "ras_tm.h" @@ -38,15 +39,15 @@ static int ras_tm_open(void); static orte_ras_base_module_t *ras_tm_init(int*); -orte_ras_base_component_1_0_0_t mca_ras_tm_component = { +orte_ras_base_component_t mca_ras_tm_component = { /* First, the mca_base_component_t struct containing meta information about the component itself */ { - /* Indicate that we are a iof v1.0.0 component (which also + /* Indicate that we are a ras v1.3.0 component (which also implies a specific MCA version) */ - ORTE_RAS_BASE_VERSION_1_0_0, + ORTE_RAS_BASE_VERSION_1_3_0, /* Component name and version */ @@ -85,6 +86,11 @@ static int ras_tm_open(void) static orte_ras_base_module_t *ras_tm_init(int* priority) { + /* if we are not an HNP, then we must not be selected */ + if (!orte_process_info.seed) { + return NULL; + } + /* Are we running under a TM job? */ if (NULL != getenv("PBS_ENVIRONMENT") && NULL != getenv("PBS_JOBID")) { diff --git a/orte/mca/ras/tm/ras_tm_module.c b/orte/mca/ras/tm/ras_tm_module.c index 4524f012f6..f2fed70042 100644 --- a/orte/mca/ras/tm/ras_tm_module.c +++ b/orte/mca/ras/tm/ras_tm_module.c @@ -28,8 +28,7 @@ #include "opal/util/output.h" #include "orte/orte_constants.h" #include "orte/orte_types.h" -#include "orte/mca/ras/base/base.h" -#include "orte/mca/ras/base/ras_base_node.h" +#include "orte/mca/ras/base/ras_private.h" #include "ras_tm.h" @@ -37,8 +36,6 @@ * Local functions */ static int allocate(orte_jobid_t jobid); -static int node_insert(opal_list_t *); -static int node_query(opal_list_t *); static int deallocate(orte_jobid_t jobid); static int finalize(void); @@ -51,8 +48,10 @@ static int get_tm_hostname(tm_node_id node, char **hostname, char **arch); */ orte_ras_base_module_t orte_ras_tm_module = { allocate, - node_insert, - node_query, + orte_ras_base_node_insert, + orte_ras_base_node_query, + orte_ras_base_node_query_alloc, + orte_ras_base_node_lookup, deallocate, finalize }; @@ -107,16 +106,6 @@ static int allocate(orte_jobid_t jobid) return ret; } -static int node_insert(opal_list_t *nodes) -{ - return orte_ras_base_node_insert(nodes); -} - -static int node_query(opal_list_t *nodes) -{ - return orte_ras_base_node_query(nodes); -} - /* * There's really nothing to do here */ diff --git a/orte/mca/ras/xgrid/src/ras_xgrid.h b/orte/mca/ras/xgrid/src/ras_xgrid.h index 3162560b49..628efdfb70 100644 --- a/orte/mca/ras/xgrid/src/ras_xgrid.h +++ b/orte/mca/ras/xgrid/src/ras_xgrid.h @@ -30,7 +30,7 @@ extern "C" { #endif - ORTE_DECLSPEC extern orte_ras_base_component_1_0_0_t mca_ras_xgrid_component; + ORTE_DECLSPEC extern orte_ras_base_component_t mca_ras_xgrid_component; ORTE_DECLSPEC extern orte_ras_base_module_t orte_ras_xgrid_module; #if defined(c_plusplus) || defined(__cplusplus) diff --git a/orte/mca/ras/xgrid/src/ras_xgrid_component.c b/orte/mca/ras/xgrid/src/ras_xgrid_component.c index 82b97f3de1..4c5c15d540 100644 --- a/orte/mca/ras/xgrid/src/ras_xgrid_component.c +++ b/orte/mca/ras/xgrid/src/ras_xgrid_component.c @@ -23,6 +23,7 @@ #include "opal/mca/base/mca_base_param.h" #include "ras_xgrid.h" #include "opal/util/output.h" +#include "orte/util/proc_info.h" /* * Local functions @@ -32,15 +33,15 @@ static int orte_ras_xgrid_component_close(void); static orte_ras_base_module_t *orte_ras_xgrid_init(int*); -orte_ras_base_component_1_0_0_t mca_ras_xgrid_component = { +orte_ras_base_component_t mca_ras_xgrid_component = { /* First, the mca_base_component_t struct containing meta information about the component itself */ { - /* Indicate that we are a iof v1.0.0 component (which also + /* Indicate that we are a ras v1.3.0 component (which also implies a specific MCA version) */ - ORTE_RAS_BASE_VERSION_1_0_0, + ORTE_RAS_BASE_VERSION_1_3_0, /* Component name and version */ @@ -82,6 +83,11 @@ orte_ras_xgrid_component_close(void) static orte_ras_base_module_t *orte_ras_xgrid_init(int* priority) { + /* if we are not an HNP, then we must not be selected */ + if (!orte_process_info.seed) { + return NULL; + } + /* Are we running under a xgrid job? */ int id = mca_base_param_find("ras", "xgrid", "priority"); mca_base_param_lookup_int(id,priority); diff --git a/orte/mca/ras/xgrid/src/ras_xgrid_module.c b/orte/mca/ras/xgrid/src/ras_xgrid_module.c index 00d0370f04..2894fc338e 100644 --- a/orte/mca/ras/xgrid/src/ras_xgrid_module.c +++ b/orte/mca/ras/xgrid/src/ras_xgrid_module.c @@ -25,9 +25,8 @@ #include "orte/orte_types.h" #include "opal/util/argv.h" #include "opal/util/output.h" -#include "orte/mca/ras/base/base.h" -#include "orte/mca/ras/base/ras_base_node.h" -#include "orte/mca/rmgr/base/base.h" +#include "orte/mca/ras/base/ras_private.h" +#include "orte/mca/rmgr/rmgr.h" #include "ras_xgrid.h" @@ -35,8 +34,6 @@ * Local functions */ static int allocate(orte_jobid_t jobid); -static int node_insert(opal_list_t *); -static int node_query(opal_list_t *); static int deallocate(orte_jobid_t jobid); static int finalize(void); @@ -48,8 +45,10 @@ static int discover(orte_jobid_t jobid, opal_list_t* nodelist); */ orte_ras_base_module_t orte_ras_xgrid_module = { allocate, - node_insert, - node_query, + orte_ras_base_node_insert, + orte_ras_base_node_query, + orte_ras_base_node_query_alloc, + orte_ras_base_node_lookup, deallocate, finalize }; @@ -93,16 +92,6 @@ static int allocate(orte_jobid_t jobid) return ret; } -static int node_insert(opal_list_t *nodes) -{ - return orte_ras_base_node_insert(nodes); -} - -static int node_query(opal_list_t *nodes) -{ - return orte_ras_base_node_query(nodes); -} - /* * There's really nothing to do here */ @@ -137,7 +126,7 @@ static int discover(orte_jobid_t jobid, opal_list_t* nodelist) char *hostname; /* how many slots do we need? */ - if(ORTE_SUCCESS != (ret = orte_rmgr_base_get_job_slots(jobid, &num_requested))) { + if(ORTE_SUCCESS != (ret = orte_rmgr.get_job_slots(jobid, &num_requested))) { return ret; } diff --git a/orte/mca/rds/base/Makefile.am b/orte/mca/rds/base/Makefile.am index 82c56ca05e..89d3a15116 100644 --- a/orte/mca/rds/base/Makefile.am +++ b/orte/mca/rds/base/Makefile.am @@ -24,4 +24,6 @@ libmca_rds_la_SOURCES += \ base/rds_base_select.c \ base/rds_base_open.c \ base/rds_base_query.c \ + base/rds_base_no_ops.c \ + base/rds_base_receive.c \ base/rds_base_registry_fns.c diff --git a/orte/mca/rds/base/base.h b/orte/mca/rds/base/base.h index 1ac5fb0aa6..d648a0fd5a 100644 --- a/orte/mca/rds/base/base.h +++ b/orte/mca/rds/base/base.h @@ -29,6 +29,8 @@ #include "opal/class/opal_list.h" #include "opal/mca/mca.h" + +#include "orte/mca/rml/rml_types.h" #include "orte/mca/rds/rds.h" @@ -58,7 +60,6 @@ ORTE_DECLSPEC int orte_rds_base_open(void); ORTE_DECLSPEC int orte_rds_base_select(void); ORTE_DECLSPEC int orte_rds_base_finalize(void); ORTE_DECLSPEC int orte_rds_base_close(void); -ORTE_DECLSPEC int orte_rds_base_query(void); /* * globals that might be needed @@ -66,21 +67,13 @@ ORTE_DECLSPEC int orte_rds_base_query(void); typedef struct orte_rds_base_t { int rds_output; + bool no_op_selected; opal_list_t rds_components; opal_list_t rds_selected; } orte_rds_base_t; ORTE_DECLSPEC extern orte_rds_base_t orte_rds_base; -/* - * external API functions will be documented in the mca/ns/ns.h file - */ - -/* - * utility functions for use within the RDS - */ -ORTE_DECLSPEC int orte_rds_base_store_resource(opal_list_t *resource_list); - #if defined(c_plusplus) || defined(__cplusplus) } #endif diff --git a/orte/mca/rds/base/rds_base_close.c b/orte/mca/rds/base/rds_base_close.c index 5a97138c8e..f74db3347b 100644 --- a/orte/mca/rds/base/rds_base_close.c +++ b/orte/mca/rds/base/rds_base_close.c @@ -31,10 +31,15 @@ int orte_rds_base_finalize(void) { opal_list_item_t* item; + /* if we are using the "null" component, then do nothing */ + if (orte_rds_base.no_op_selected) { + return ORTE_SUCCESS; + } + /* Finalize all selected modules */ while((item = opal_list_remove_first(&orte_rds_base.rds_selected)) != NULL) { orte_rds_base_selected_t* selected = (orte_rds_base_selected_t*)item; - selected->module->finalize(); + selected->component->rds_fini(); OBJ_RELEASE(selected); } return ORTE_SUCCESS; @@ -42,6 +47,11 @@ int orte_rds_base_finalize(void) int orte_rds_base_close(void) { + /* if we are using the "null" component, then do nothing */ + if (orte_rds_base.no_op_selected) { + return ORTE_SUCCESS; + } + /* Close all remaining available components (may be one if this is a Open RTE program, or [possibly] multiple if this is ompi_info) */ diff --git a/orte/mca/rds/base/rds_base_no_ops.c b/orte/mca/rds/base/rds_base_no_ops.c new file mode 100644 index 0000000000..711c09086c --- /dev/null +++ b/orte/mca/rds/base/rds_base_no_ops.c @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + */ + +/* + * includes + */ +#include "orte_config.h" +#include "orte/orte_constants.h" + +#include "opal/class/opal_list.h" + +#include "orte/mca/rds/base/rds_private.h" + +int orte_rds_base_no_op_query(void) +{ + return ORTE_ERR_NOT_SUPPORTED; +} + +int orte_rds_base_no_op_store_resource(opal_list_t *resources) +{ + return ORTE_ERR_NOT_SUPPORTED; +} diff --git a/orte/mca/rds/base/rds_base_open.c b/orte/mca/rds/base/rds_base_open.c index 9860c0ee6d..bf30a72ac4 100644 --- a/orte/mca/rds/base/rds_base_open.c +++ b/orte/mca/rds/base/rds_base_open.c @@ -23,9 +23,13 @@ #include "opal/mca/mca.h" #include "opal/mca/base/base.h" #include "opal/mca/base/mca_base_param.h" -#include "orte/mca/gpr/gpr_types.h" #include "opal/util/output.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/gpr/gpr_types.h" +#include "orte/mca/rml/rml.h" + +#include "orte/mca/rds/base/rds_private.h" #include "orte/mca/rds/base/base.h" @@ -86,7 +90,16 @@ OBJ_CLASS_INSTANCE( /* * Global variables */ -orte_rds_base_module_t orte_rds; +orte_rds_base_module_t orte_rds = { + orte_rds_base_query, + orte_rds_base_store_resource +}; + +orte_rds_base_module_t orte_rds_no_op = { + orte_rds_base_no_op_query, + orte_rds_base_no_op_store_resource +}; + orte_rds_base_t orte_rds_base; /** @@ -96,6 +109,7 @@ orte_rds_base_t orte_rds_base; int orte_rds_base_open(void) { int param, value; + char *requested; /* Debugging / verbose output */ @@ -109,8 +123,26 @@ int orte_rds_base_open(void) orte_rds_base.rds_output = -1; } + /* Some systems do not want any RDS support. In those cases, + * memory consumption is also an issue. For those systems, we + * avoid opening the RDS components by checking for a directive + * to use the "null" component. + */ + param = mca_base_param_reg_string_name("rds", NULL, NULL, + false, false, NULL, &requested); + if (NULL != requested && 0 == strcmp(requested, "null")) { + /* the user has specifically requested that we use the "null" + * component. In this case, that means we do NOT open any + * components, and we simply use the default module we have + * already defined above + */ + orte_rds_base.no_op_selected = true; + orte_rds = orte_rds_no_op; /* use the no_op module */ + return ORTE_SUCCESS; + } + orte_rds_base.no_op_selected = false; + /* Open up all available components */ - if (ORTE_SUCCESS != mca_base_components_open("rds", orte_rds_base.rds_output, mca_rds_base_static_components, diff --git a/orte/mca/rds/base/rds_base_query.c b/orte/mca/rds/base/rds_base_query.c index 65e4fcb6bb..18714719f8 100644 --- a/orte/mca/rds/base/rds_base_query.c +++ b/orte/mca/rds/base/rds_base_query.c @@ -22,6 +22,8 @@ #include "opal/mca/mca.h" #include "opal/mca/base/base.h" + +#include "orte/mca/rds/base/rds_private.h" #include "orte/mca/rds/base/base.h" diff --git a/orte/mca/rds/base/rds_base_receive.c b/orte/mca/rds/base/rds_base_receive.c new file mode 100644 index 0000000000..bd849f0a68 --- /dev/null +++ b/orte/mca/rds/base/rds_base_receive.c @@ -0,0 +1,111 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + */ + +/* + * includes + */ +#include "orte_config.h" + +#include "orte/orte_constants.h" +#include "orte/orte_types.h" + +#include "opal/util/output.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/dss/dss.h" +#include "orte/util/proc_info.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rml/rml.h" + +#include "orte/mca/rds/base/rds_private.h" + +static bool recv_issued=false; + +int orte_rds_base_comm_start(void) +{ + int rc; + + if (recv_issued) { + return ORTE_SUCCESS; + } + + if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_RML_NAME_ANY, + ORTE_RML_TAG_RDS, + ORTE_RML_PERSISTENT, + orte_rds_base_recv, + NULL))) { + ORTE_ERROR_LOG(rc); + } + recv_issued = true; + + return rc; +} + + + +/* + * handle message from proxies + * NOTE: The incoming buffer "buffer" is OBJ_RELEASED by the calling program. + * DO NOT RELEASE THIS BUFFER IN THIS CODE + */ + +void orte_rds_base_recv(int status, orte_process_name_t* sender, + orte_buffer_t* buffer, orte_rml_tag_t tag, + void* cbdata) +{ + orte_buffer_t answer; + orte_rds_cmd_flag_t command; + orte_std_cntr_t count; + int rc; + + count = 1; + if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &command, &count, ORTE_RDS_CMD))) { + ORTE_ERROR_LOG(rc); + return; + } + + OBJ_CONSTRUCT(&answer, orte_buffer_t); + + switch (command) { + case ORTE_RDS_QUERY_CMD: + if (ORTE_SUCCESS != (rc = orte_dss.pack(&answer, &command, 1, ORTE_RDS_CMD))) { + ORTE_ERROR_LOG(rc); + } + + if (ORTE_SUCCESS != (rc = orte_rds_base_query())) { + ORTE_ERROR_LOG(rc); + } + + if (0 > orte_rml.send_buffer(sender, &answer, tag, 0)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + } + break; + + default: + ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS); + } + + /* cleanup */ + OBJ_DESTRUCT(&answer); +} + diff --git a/orte/mca/rds/base/rds_base_registry_fns.c b/orte/mca/rds/base/rds_base_registry_fns.c index aaba2ab4f9..826f972d39 100644 --- a/orte/mca/rds/base/rds_base_registry_fns.c +++ b/orte/mca/rds/base/rds_base_registry_fns.c @@ -33,6 +33,7 @@ #include "orte/mca/gpr/gpr.h" #include "orte/mca/schema/schema.h" +#include "orte/mca/rds/base/rds_private.h" #include "orte/mca/rds/base/base.h" int orte_rds_base_store_resource(opal_list_t *resources) diff --git a/orte/mca/rds/base/rds_base_select.c b/orte/mca/rds/base/rds_base_select.c index 28047cd878..d2c28d0646 100644 --- a/orte/mca/rds/base/rds_base_select.c +++ b/orte/mca/rds/base/rds_base_select.c @@ -41,6 +41,11 @@ int orte_rds_base_select(void) orte_rds_base_component_t *component; orte_rds_base_module_t *module = NULL; + /* if we are using the "null" component, then do nothing */ + if (orte_rds_base.no_op_selected) { + return ORTE_SUCCESS; + } + /* Iterate through all the available components */ for (item = opal_list_get_first(&orte_rds_base.rds_components); @@ -71,8 +76,6 @@ int orte_rds_base_select(void) return ORTE_ERROR; } - orte_rds = *module; - return ORTE_SUCCESS; } diff --git a/orte/mca/rds/base/rds_private.h b/orte/mca/rds/base/rds_private.h new file mode 100644 index 0000000000..5cfa977eab --- /dev/null +++ b/orte/mca/rds/base/rds_private.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + */ + +#ifndef ORTE_RDS_PRIVATE_H +#define ORTE_RDS_PRIVATE_H + +/* + * includes + */ +#include "orte_config.h" +#include "orte/orte_constants.h" + +#include "orte/dss/dss_types.h" +#include "orte/mca/ns/ns_types.h" +#include "orte/mca/rml/rml_types.h" + + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +/* Define the RDS command flag */ +typedef uint8_t orte_rds_cmd_flag_t; +#define ORTE_RDS_CMD ORTE_UINT8 + +/* define some commands */ +#define ORTE_RDS_QUERY_CMD 0x01 + +/* + * API function definitions + */ +ORTE_DECLSPEC int orte_rds_base_query(void); + +/* + * oob interface + */ +int orte_rds_base_comm_start(void); + +void orte_rds_base_recv(int status, orte_process_name_t* sender, + orte_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata); + +/* + * utility functions for use within the RDS + */ +int orte_rds_base_store_resource(opal_list_t *resource_list); + +/* + * the "null" component functions + */ +int orte_rds_base_no_op_query(void); +int orte_rds_base_no_op_store_resource(opal_list_t *resource_list); + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif +#endif diff --git a/orte/mca/rds/hostfile/rds_hostfile.c b/orte/mca/rds/hostfile/rds_hostfile.c index 90dbcb9e51..c27154c2d2 100644 --- a/orte/mca/rds/hostfile/rds_hostfile.c +++ b/orte/mca/rds/hostfile/rds_hostfile.c @@ -34,14 +34,15 @@ #include "orte/mca/ns/ns.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/ras/ras.h" -#include "orte/mca/ras/base/ras_base_node.h" +#include "orte/mca/ras/base/ras_private.h" +#include "orte/runtime/runtime_types.h" + #include "orte/mca/rds/rds.h" -#include "orte/mca/rds/base/base.h" +#include "orte/mca/rds/base/rds_private.h" #include "orte/mca/rds/hostfile/rds_hostfile.h" #include "orte/mca/rds/hostfile/rds_hostfile_lex.h" -#include "orte/runtime/runtime_types.h" - +static bool orte_rds_hostfile_queried = false; static orte_cellid_t local_cellid; static bool need_cellid = true; static char *cur_hostfile_name = NULL; @@ -311,9 +312,6 @@ static int orte_rds_hostfile_parse(const char *hostfile, opal_list_t* existing, orte_rds_hostfile_done = false; orte_rds_hostfile_in = fopen(hostfile, "r"); if (NULL == orte_rds_hostfile_in) { - opal_show_help("help-rds-hostfile.txt", "rds:no-hostfile", - true, - cur_hostfile_name); rc = ORTE_ERR_NOT_FOUND; goto unlock; } @@ -378,6 +376,19 @@ static int orte_rds_hostfile_query(void) orte_ras_node_t *ras_item; int rc; + if (orte_rds_hostfile_queried) { + /* if we have already been queried, then + * our info is on the registry, so just + * return. Note that this restriction + * may eventually be lifted - ideally, + * we might check to see if this is a + * new file name and go ahead with the + * query if so. + */ + return ORTE_SUCCESS; + } + orte_rds_hostfile_queried = true; + OBJ_CONSTRUCT(&existing, opal_list_t); OBJ_CONSTRUCT(&updates, opal_list_t); OBJ_CONSTRUCT(&rds_updates, opal_list_t); @@ -394,7 +405,9 @@ static int orte_rds_hostfile_query(void) if(mca_rds_hostfile_component.default_hostfile) { rc = ORTE_SUCCESS; } else { - opal_output(0, "orte_rds_hostfile: could not open %s\n", mca_rds_hostfile_component.path); + opal_show_help("help-rds-hostfile.txt", "rds:no-hostfile", + true, + mca_rds_hostfile_component.path); } goto cleanup; } else if (ORTE_SUCCESS != rc) { @@ -513,14 +526,7 @@ cleanup: } -static int orte_rds_hostfile_finalize(void) -{ - return ORTE_SUCCESS; -} - - orte_rds_base_module_t orte_rds_hostfile_module = { orte_rds_hostfile_query, - orte_rds_base_store_resource, - orte_rds_hostfile_finalize + orte_rds_base_store_resource }; diff --git a/orte/mca/rds/hostfile/rds_hostfile_component.c b/orte/mca/rds/hostfile/rds_hostfile_component.c index 3c844e84b7..dd190f5668 100644 --- a/orte/mca/rds/hostfile/rds_hostfile_component.c +++ b/orte/mca/rds/hostfile/rds_hostfile_component.c @@ -17,15 +17,19 @@ */ #include "orte_config.h" +#include "orte/orte_constants.h" #include "opal/install_dirs.h" -#include "orte/orte_constants.h" #include "opal/mca/base/base.h" #include "opal/mca/base/mca_base_param.h" -#include "orte/util/proc_info.h" #include "opal/util/output.h" #include "opal/util/os_path.h" + +#include "orte/util/proc_info.h" +#include "orte/mca/errmgr/errmgr.h" + #include "orte/mca/rds/hostfile/rds_hostfile.h" +#include "orte/mca/rds/base/rds_private.h" /* * Local functions @@ -34,7 +38,7 @@ static int orte_rds_hostfile_open(void); static int orte_rds_hostfile_close(void); static orte_rds_base_module_t* orte_rds_hostfile_init(void); - +static int orte_rds_hostfile_finalize(void); orte_rds_hostfile_component_t mca_rds_hostfile_component = { { @@ -42,10 +46,10 @@ orte_rds_hostfile_component_t mca_rds_hostfile_component = { information about the component itself */ { - /* Indicate that we are a iof v1.0.0 component (which also + /* Indicate that we are a rds v1.3.0 component (which also implies a specific MCA version) */ - ORTE_RDS_BASE_VERSION_1_0_0, + ORTE_RDS_BASE_VERSION_1_3_0, "hostfile", /* MCA component name */ ORTE_MAJOR_VERSION, /* MCA component major version */ @@ -61,7 +65,8 @@ orte_rds_hostfile_component_t mca_rds_hostfile_component = { false }, - orte_rds_hostfile_init + orte_rds_hostfile_init, + orte_rds_hostfile_finalize } }; @@ -92,9 +97,27 @@ static int orte_rds_hostfile_open(void) static orte_rds_base_module_t *orte_rds_hostfile_init(void) { + int rc; + + /* if we are NOT an HNP, then don't select us */ + if (!orte_process_info.seed) { + return NULL; + } + + /* issue non-blocking receive for call_back function */ + if (ORTE_SUCCESS != (rc = orte_rds_base_comm_start())) { + ORTE_ERROR_LOG(rc); + return NULL; + } + return &orte_rds_hostfile_module; } +static int orte_rds_hostfile_finalize(void) +{ + return ORTE_SUCCESS; +} + /** * Close all subsystems. */ diff --git a/orte/mca/rds/proxy/Makefile.am b/orte/mca/rds/proxy/Makefile.am new file mode 100644 index 0000000000..f50c1a68e6 --- /dev/null +++ b/orte/mca/rds/proxy/Makefile.am @@ -0,0 +1,51 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Use the top-level Makefile.options + + + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if OMPI_BUILD_rds_proxy_DSO +component_noinst = +component_install = mca_rds_proxy.la +else +component_noinst = libmca_rds_proxy.la +component_install = +endif + +proxy_SOURCES = \ + rds_proxy.c \ + rds_proxy.h \ + rds_proxy_component.c + +mcacomponentdir = $(libdir)/openmpi +mcacomponent_LTLIBRARIES = $(component_install) +mca_rds_proxy_la_SOURCES = $(proxy_SOURCES) +mca_rds_proxy_la_LIBADD = \ + $(top_ompi_builddir)/orte/liborte.la \ + $(top_ompi_builddir)/opal/libopal.la +mca_rds_proxy_la_LDFLAGS = -module -avoid-version + +noinst_LTLIBRARIES = $(component_noinst) +libmca_rds_proxy_la_SOURCES = $(proxy_SOURCES) +libmca_rds_proxy_la_LIBADD = +libmca_rds_proxy_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/rds/proxy/configure.params b/orte/mca/rds/proxy/configure.params new file mode 100644 index 0000000000..78194de47c --- /dev/null +++ b/orte/mca/rds/proxy/configure.params @@ -0,0 +1,23 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Specific to this module + +PARAM_INIT_FILE=rds_proxy.c +PARAM_CONFIG_FILES="Makefile" diff --git a/orte/mca/rds/proxy/rds_proxy.c b/orte/mca/rds/proxy/rds_proxy.c new file mode 100644 index 0000000000..0ecfaf8f2d --- /dev/null +++ b/orte/mca/rds/proxy/rds_proxy.c @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + */ + +#include "orte_config.h" + +#include + +#include "orte/orte_constants.h" +#include "orte/orte_types.h" + +#include "orte/dss/dss.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rml/rml.h" + +#include "orte/mca/rds/base/rds_private.h" +#include "rds_proxy.h" + +/** + * globals + */ + +/* + * functions + */ + +int orte_rds_proxy_query(void) +{ + orte_buffer_t* cmd; + orte_buffer_t* answer; + orte_rds_cmd_flag_t command; + orte_std_cntr_t count; + int rc; + + command = ORTE_RDS_QUERY_CMD; + + cmd = OBJ_NEW(orte_buffer_t); + if (cmd == NULL) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_RDS_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + if (0 > orte_rml.send_buffer(orte_rds_proxy_replica, cmd, ORTE_RML_TAG_RDS, 0)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(cmd); + return ORTE_ERR_COMM_FAILURE; + } + OBJ_RELEASE(cmd); + + answer = OBJ_NEW(orte_buffer_t); + if(answer == NULL) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + if (0 > orte_rml.recv_buffer(orte_rds_proxy_replica, answer, ORTE_RML_TAG_RDS)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(answer); + return ORTE_ERR_COMM_FAILURE; + } + + count = 1; + if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_RDS_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(answer); + return rc; + } + + if (ORTE_RDS_QUERY_CMD != command) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(answer); + return ORTE_ERR_COMM_FAILURE; + } + + OBJ_RELEASE(answer); + return ORTE_SUCCESS; +} diff --git a/orte/mca/rds/proxy/rds_proxy.h b/orte/mca/rds/proxy/rds_proxy.h new file mode 100644 index 0000000000..908cbf1ada --- /dev/null +++ b/orte/mca/rds/proxy/rds_proxy.h @@ -0,0 +1,57 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ +#ifndef RDS_PROXY_H +#define RDS_PROXY_H + +#include "orte_config.h" + +#include "orte/mca/rds/rds.h" + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +/* my replica */ +extern orte_process_name_t *orte_rds_proxy_replica; + +/* + * Module open / close + */ +int orte_rds_proxy_open(void); +int orte_rds_proxy_close(void); + + +/* + * Startup / Shutdown + */ +orte_rds_base_module_t* orte_rds_proxy_init(void); +int orte_rds_proxy_finalize(void); + +/* + * proxy function prototypes + */ +int orte_rds_proxy_query(void); + + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + +#endif diff --git a/orte/mca/rds/proxy/rds_proxy_component.c b/orte/mca/rds/proxy/rds_proxy_component.c new file mode 100644 index 0000000000..8ffb4efb07 --- /dev/null +++ b/orte/mca/rds/proxy/rds_proxy_component.c @@ -0,0 +1,118 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + */ + +/* + * includes + */ +#include "orte_config.h" + +#include "orte/orte_constants.h" +#include "orte/util/proc_info.h" +#include "opal/util/output.h" + +#include "opal/mca/mca.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/mca/rds/base/rds_private.h" +#include "rds_proxy.h" + +/* + * Struct of function pointers that need to be initialized + */ +orte_rds_base_component_t mca_rds_proxy_component = { + { + ORTE_RDS_BASE_VERSION_1_3_0, + + "proxy", /* MCA module name */ + ORTE_MAJOR_VERSION, /* MCA module major version */ + ORTE_MINOR_VERSION, /* MCA module minor version */ + ORTE_RELEASE_VERSION, /* MCA module release version */ + orte_rds_proxy_open, /* module open */ + orte_rds_proxy_close /* module close */ + }, + { + false /* checkpoint / restart */ + }, + orte_rds_proxy_init, /* module init */ + orte_rds_proxy_finalize /* module shutdown */ +}; + +/* + * setup the function pointers for the module + */ +orte_rds_base_module_t orte_rds_proxy_module = { + orte_rds_proxy_query, + orte_rds_base_store_resource, +}; + +/* + * Whether or not we allowed this component to be selected + */ +static bool initialized = false; + +/* the name of our replica */ +orte_process_name_t *orte_rds_proxy_replica; + +/* + * Not much to do here. + */ +int orte_rds_proxy_open(void) +{ + return ORTE_SUCCESS; +} + +/* + * ditto for this one + */ +int orte_rds_proxy_close(void) +{ + return ORTE_SUCCESS; +} + +orte_rds_base_module_t* orte_rds_proxy_init(void) +{ + /* If we are an HNP, then don't pick us */ + if (orte_process_info.seed) { + return NULL; + } + + /* define the replica for us to use - for now, just point + * to the name service replica + */ + orte_rds_proxy_replica = orte_process_info.ns_replica; + + initialized = true; + return &orte_rds_proxy_module; +} + + +/* + * finalize routine + */ +int orte_rds_proxy_finalize(void) +{ + initialized = false; + + /* All done */ + + return ORTE_SUCCESS; +} diff --git a/orte/mca/rds/rds.h b/orte/mca/rds/rds.h index 4ae215689b..252aea216a 100644 --- a/orte/mca/rds/rds.h +++ b/orte/mca/rds/rds.h @@ -149,57 +149,52 @@ extern "C" { */ typedef int (*orte_rds_base_module_query_fn_t)(void); -/** - * Cleanup module resources. - */ - -typedef int (*orte_rds_base_module_finalize_fn_t)(void); - /** * Add a list of resources to the Resource Segment */ typedef int (*orte_rds_base_module_store_resource_fn_t)(opal_list_t *); /* - * Ver 1.0.0 + * Ver 1.3.0 */ -struct orte_rds_base_module_1_0_0_t { +struct orte_rds_base_module_1_3_0_t { orte_rds_base_module_query_fn_t query; orte_rds_base_module_store_resource_fn_t store_resource; - orte_rds_base_module_finalize_fn_t finalize; }; -typedef struct orte_rds_base_module_1_0_0_t orte_rds_base_module_1_0_0_t; -typedef orte_rds_base_module_1_0_0_t orte_rds_base_module_t; +typedef struct orte_rds_base_module_1_3_0_t orte_rds_base_module_1_3_0_t; +typedef orte_rds_base_module_1_3_0_t orte_rds_base_module_t; /* * RDS Component */ typedef orte_rds_base_module_t* (*orte_rds_base_component_init_fn_t)(void); +typedef int (*orte_rds_base_component_finalize_fn_t)(void); /* * the standard component data structure */ -struct orte_rds_base_component_1_0_0_t { - mca_base_component_t rds_version; - mca_base_component_data_1_0_0_t rds_data; - orte_rds_base_component_init_fn_t rds_init; +struct orte_rds_base_component_1_3_0_t { + mca_base_component_t rds_version; + mca_base_component_data_1_0_0_t rds_data; + orte_rds_base_component_init_fn_t rds_init; + orte_rds_base_component_finalize_fn_t rds_fini; }; -typedef struct orte_rds_base_component_1_0_0_t orte_rds_base_component_1_0_0_t; -typedef orte_rds_base_component_1_0_0_t orte_rds_base_component_t; +typedef struct orte_rds_base_component_1_3_0_t orte_rds_base_component_1_3_0_t; +typedef orte_rds_base_component_1_3_0_t orte_rds_base_component_t; /* - * Macro for use in components that are of type rda v1.0.0 + * Macro for use in components that are of type rds v1.3.0 */ -#define ORTE_RDS_BASE_VERSION_1_0_0 \ - /* rds v1.0 is chained to MCA v1.0 */ \ +#define ORTE_RDS_BASE_VERSION_1_3_0 \ + /* rds v1.3 is chained to MCA v1.0 */ \ MCA_BASE_VERSION_1_0_0, \ - /* rds v1.0 */ \ - "rds", 1, 0, 0 + /* rds v1.3 */ \ + "rds", 1, 3, 0 /* * global module that holds function pointers diff --git a/orte/mca/rds/resfile/rds_resfile.c b/orte/mca/rds/resfile/rds_resfile.c index 363a9f16e3..356e58ed70 100644 --- a/orte/mca/rds/resfile/rds_resfile.c +++ b/orte/mca/rds/resfile/rds_resfile.c @@ -24,6 +24,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/ns/ns.h" +#include "orte/mca/rds/base/rds_private.h" #include "orte/mca/rds/resfile/rds_resfile.h" #define ORTE_RDS_RESFILE_MAX_LINE_LENGTH 512 @@ -228,12 +229,15 @@ int orte_rds_resfile_query(void) FILE *fp; char *input_line, *site; + if (orte_rds_resfile_queried) { + /* if we have previously been queried, then our info + * is already on the registry, so just return + */ + return ORTE_SUCCESS; + } + OPAL_LOCK(&mca_rds_resfile_component.lock); - if (orte_rds_resfile_queried) { - OPAL_UNLOCK(&mca_rds_resfile_component.lock); - return ORTE_SUCCESS; - } orte_rds_resfile_queried = true; /* get the resource filename */ @@ -301,12 +305,6 @@ CLEANUP: } -int orte_rds_resfile_finalize(void) -{ - return ORTE_SUCCESS; -} - - char *orte_rds_resfile_getline(FILE *fp) { int i; diff --git a/orte/mca/rds/resfile/rds_resfile_component.c b/orte/mca/rds/resfile/rds_resfile_component.c index 79534f01e3..ecec4ffb71 100644 --- a/orte/mca/rds/resfile/rds_resfile_component.c +++ b/orte/mca/rds/resfile/rds_resfile_component.c @@ -18,10 +18,16 @@ #include "orte_config.h" #include "orte/orte_constants.h" + #include "opal/mca/base/base.h" #include "opal/mca/base/mca_base_param.h" -#include "orte/util/proc_info.h" #include "opal/util/output.h" + +#include "orte/util/proc_info.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/ns/ns_types.h" + +#include "orte/mca/rds/base/rds_private.h" #include "orte/mca/rds/resfile/rds_resfile.h" /* @@ -32,7 +38,6 @@ static int orte_rds_resfile_open(void); static int orte_rds_resfile_close(void); static orte_rds_base_module_t* orte_rds_resfile_init(void); - orte_rds_resfile_component_t mca_rds_resfile_component = { { /* First, the mca_base_component_t struct containing meta @@ -42,7 +47,7 @@ orte_rds_resfile_component_t mca_rds_resfile_component = { /* Indicate that we are a iof v1.0.0 component (which also implies a specific MCA version) */ - ORTE_RDS_BASE_VERSION_1_0_0, + ORTE_RDS_BASE_VERSION_1_3_0, "resfile", /* MCA component name */ ORTE_MAJOR_VERSION, /* MCA component major version */ @@ -58,7 +63,8 @@ orte_rds_resfile_component_t mca_rds_resfile_component = { false }, - orte_rds_resfile_init + orte_rds_resfile_init, + orte_rds_resfile_finalize } }; @@ -66,7 +72,6 @@ orte_rds_resfile_component_t mca_rds_resfile_component = { orte_rds_base_module_t orte_rds_resfile_module = { orte_rds_resfile_query, orte_rds_base_store_resource, - orte_rds_resfile_finalize }; /* @@ -98,10 +103,30 @@ static int orte_rds_resfile_open(void) static orte_rds_base_module_t *orte_rds_resfile_init(void) { + int rc; + + /* if we are not an HNP, then don't select us */ + if (!orte_process_info.seed) { + return NULL; + } + + /* if we are an HNP, then volunteer */ OBJ_DESTRUCT(&mca_rds_resfile_component.lock); + + /* issue non-blocking receive for call_back function */ + if (ORTE_SUCCESS != (rc = orte_rds_base_comm_start())) { + ORTE_ERROR_LOG(rc); + return NULL; + } + return &orte_rds_resfile_module; } +int orte_rds_resfile_finalize(void) +{ + return ORTE_SUCCESS; +} + /** * Close all subsystems. */ diff --git a/orte/mca/rmaps/base/Makefile.am b/orte/mca/rmaps/base/Makefile.am index c542590b70..6fc3ed40fd 100644 --- a/orte/mca/rmaps/base/Makefile.am +++ b/orte/mca/rmaps/base/Makefile.am @@ -20,13 +20,14 @@ dist_pkgdata_DATA = base/help-orte-rmaps-base.txt headers += \ base/base.h \ - base/rmaps_base_node.h \ - base/rmaps_base_map.h + base/rmaps_private.h libmca_rmaps_la_SOURCES += \ base/rmaps_base_close.c \ - base/rmaps_base_map.h \ base/rmaps_base_map.c \ + base/rmaps_base_map_job.c \ base/rmaps_base_node.c \ + base/rmaps_base_no_ops.c \ base/rmaps_base_open.c \ - base/rmaps_base_select.c + base/rmaps_base_receive.c \ + base/rmaps_base_find_avail.c diff --git a/orte/mca/rmaps/base/base.h b/orte/mca/rmaps/base/base.h index 48f8b466ce..961e6592fb 100644 --- a/orte/mca/rmaps/base/base.h +++ b/orte/mca/rmaps/base/base.h @@ -49,6 +49,8 @@ extern "C" { typedef struct orte_rmaps_base_t { /** Verbose/debug output stream */ int rmaps_output; + /** Whether or not the NO_OP module is in use */ + bool no_op_selected; /** List of opened components */ opal_list_t rmaps_opened; /** Sorted list of available components (highest priority first) */ @@ -62,25 +64,6 @@ extern "C" { */ ORTE_DECLSPEC extern orte_rmaps_base_t orte_rmaps_base; - /** - * RMAPS component/module/priority tuple - */ - struct orte_rmaps_base_cmp_t { - /** Base object */ - opal_list_item_t super; - /** rmaps component */ - orte_rmaps_base_component_t *component; - /** rmaps module */ - orte_rmaps_base_module_t* module; - /** This component's priority */ - int priority; - }; - /** Convenience typedef */ - typedef struct orte_rmaps_base_cmp_t orte_rmaps_base_cmp_t; - /** Class declaration */ - ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_rmaps_base_cmp_t); - - /** * Open the rmaps framework */ @@ -89,7 +72,7 @@ extern "C" { /** * Select an rmaps component / module */ - ORTE_DECLSPEC orte_rmaps_base_module_t *orte_rmaps_base_select(char *preferred); + ORTE_DECLSPEC int orte_rmaps_base_find_available(void); /** * Utility routines to get/set vpid mapping for the job diff --git a/orte/mca/rmaps/base/rmaps_base_close.c b/orte/mca/rmaps/base/rmaps_base_close.c index 2ce13b7b34..e9ff5749e6 100644 --- a/orte/mca/rmaps/base/rmaps_base_close.c +++ b/orte/mca/rmaps/base/rmaps_base_close.c @@ -25,6 +25,7 @@ #include "opal/mca/mca.h" #include "opal/mca/base/base.h" +#include "orte/mca/rmaps/base/rmaps_private.h" #include "orte/mca/rmaps/base/base.h" @@ -32,28 +33,33 @@ int orte_rmaps_base_finalize(void) { opal_list_item_t* item; - /* Finalize all available modules */ - - while (NULL != - (item = opal_list_remove_first(&orte_rmaps_base.rmaps_available))) { - orte_rmaps_base_cmp_t* cmp = (orte_rmaps_base_cmp_t*) item; - opal_output(orte_rmaps_base.rmaps_output, - "orte:base:close: finalizing module %s", - cmp->component->rmaps_version.mca_component_name); - if (NULL != cmp->module->finalize) { - cmp->module->finalize(); + if (!orte_rmaps_base.no_op_selected) { + /* Finalize all available modules */ + + while (NULL != + (item = opal_list_remove_first(&orte_rmaps_base.rmaps_available))) { + orte_rmaps_base_cmp_t* cmp = (orte_rmaps_base_cmp_t*) item; + opal_output(orte_rmaps_base.rmaps_output, + "orte:rmaps:base:close: finalizing module %s", + cmp->component->rmaps_version.mca_component_name); + if (NULL != cmp->module->finalize) { + cmp->module->finalize(); + } + OBJ_RELEASE(cmp); } - OBJ_RELEASE(cmp); } + return ORTE_SUCCESS; } int orte_rmaps_base_close(void) { - /* Close all remaining open components */ + if (!orte_rmaps_base.no_op_selected) { + /* Close all remaining open components */ - mca_base_components_close(orte_rmaps_base.rmaps_output, - &orte_rmaps_base.rmaps_opened, NULL); + mca_base_components_close(orte_rmaps_base.rmaps_output, + &orte_rmaps_base.rmaps_opened, NULL); + } return ORTE_SUCCESS; } diff --git a/orte/mca/rmaps/base/rmaps_base_find_avail.c b/orte/mca/rmaps/base/rmaps_base_find_avail.c new file mode 100644 index 0000000000..c20a4375a5 --- /dev/null +++ b/orte/mca/rmaps/base/rmaps_base_find_avail.c @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" + +#include + +#include "orte/orte_constants.h" +#include "opal/util/output.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" + +#include "orte/mca/rmaps/base/rmaps_private.h" +#include "orte/mca/rmaps/base/base.h" + + +/* + * Local functions + */ +static void cmp_constructor(orte_rmaps_base_cmp_t *cmp); +static void cmp_destructor(orte_rmaps_base_cmp_t *cmp); +OBJ_CLASS_INSTANCE(orte_rmaps_base_cmp_t, opal_list_item_t, + cmp_constructor, cmp_destructor); + +static int compare(opal_list_item_t **a, opal_list_item_t **b); + + +/* + * Function for selecting one component from all those that are + * available. + */ +int orte_rmaps_base_find_available(void) +{ + opal_list_item_t *item; + mca_base_component_list_item_t *cli; + orte_rmaps_base_component_t *component; + orte_rmaps_base_module_t *module; + orte_rmaps_base_cmp_t *cmp; + int priority; + + /* construct the list to hold any available components */ + OBJ_CONSTRUCT(&orte_rmaps_base.rmaps_available, opal_list_t); + + /* Query all the opened components and see if they want to run */ + for (item = opal_list_get_first(&orte_rmaps_base.rmaps_opened); + opal_list_get_end(&orte_rmaps_base.rmaps_opened) != item; + item = opal_list_get_next(item)) { + cli = (mca_base_component_list_item_t *) item; + component = (orte_rmaps_base_component_t *) cli->cli_component; + opal_output(orte_rmaps_base.rmaps_output, + "orte:base:open: querying component %s", + component->rmaps_version.mca_component_name); + + /* Call the component's init function and see if it wants to be + selected */ + + module = component->rmaps_init(&priority); + + /* If we got a non-NULL module back, then the component wants + to be considered for selection */ + + if (NULL != module) { + opal_output(orte_rmaps_base.rmaps_output, + "orte:base:open: component %s returns priority %d", + component->rmaps_version.mca_component_name, + priority); + + cmp = OBJ_NEW(orte_rmaps_base_cmp_t); + cmp->component = component; + cmp->module = module; + cmp->priority = priority; + + opal_list_append(&orte_rmaps_base.rmaps_available, &cmp->super); + } else { + opal_output(orte_rmaps_base.rmaps_output, + "orte:base:open: component %s does NOT want to be considered for selection", + component->rmaps_version.mca_component_name); + } + } + + /* Sort the resulting available list in priority order */ + opal_list_sort(&orte_rmaps_base.rmaps_available, compare); + + /* all done */ + return ORTE_SUCCESS; +} + +static void cmp_constructor(orte_rmaps_base_cmp_t *cmp) +{ + cmp->component = NULL; + cmp->module = NULL; + cmp->priority = -1; +} + + +static void cmp_destructor(orte_rmaps_base_cmp_t *cmp) +{ + cmp_constructor(cmp); +} + + +static int compare(opal_list_item_t **a, opal_list_item_t **b) +{ + orte_rmaps_base_cmp_t *aa = *((orte_rmaps_base_cmp_t **) a); + orte_rmaps_base_cmp_t *bb = *((orte_rmaps_base_cmp_t **) b); + + if (aa->priority > bb->priority) { + return 1; + } else if (aa->priority == bb->priority) { + return 0; + } else { + return -1; + } +} diff --git a/orte/mca/rmaps/base/rmaps_base_map.c b/orte/mca/rmaps/base/rmaps_base_map.c index f90d02b49e..b4620ba424 100644 --- a/orte/mca/rmaps/base/rmaps_base_map.c +++ b/orte/mca/rmaps/base/rmaps_base_map.c @@ -29,11 +29,12 @@ #include "orte/mca/gpr/gpr.h" #include "orte/mca/ns/ns.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rmgr/base/base.h" -#include "orte/mca/rmaps/base/base.h" -#include "orte/mca/rmaps/base/rmaps_base_map.h" +#include "orte/mca/ras/ras.h" +#include "orte/mca/rmgr/rmgr.h" #include "orte/mca/smr/smr_types.h" +#include "orte/mca/rmaps/base/rmaps_private.h" +#include "orte/mca/rmaps/base/base.h" /** * orte_rmaps_base_node_t @@ -324,14 +325,14 @@ int orte_rmaps_base_get_map(orte_jobid_t jobid, opal_list_t* mapping_list) }; /* query the application context */ - if(ORTE_SUCCESS != (rc = orte_rmgr_base_get_app_context(jobid, &app_context, &num_context))) { + if(ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(jobid, &app_context, &num_context))) { ORTE_ERROR_LOG(rc); return rc; } /* query the node list */ OBJ_CONSTRUCT(&nodes, opal_list_t); - if(ORTE_SUCCESS != (rc = orte_ras_base_node_query_alloc(&nodes,jobid))) { + if(ORTE_SUCCESS != (rc = orte_ras.node_query_alloc(&nodes,jobid))) { ORTE_ERROR_LOG(rc); goto cleanup; } @@ -553,13 +554,13 @@ int orte_rmaps_base_get_node_map( }; /* allocate the node */ - if(NULL == (ras_node = orte_ras_base_node_lookup(cellid,hostname))) { + if(NULL == (ras_node = orte_ras.node_lookup(cellid,hostname))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); return ORTE_ERR_NOT_FOUND; } /* query the application context */ - if(ORTE_SUCCESS != (rc = orte_rmgr_base_get_app_context(jobid, &app_context, &num_context))) { + if(ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(jobid, &app_context, &num_context))) { ORTE_ERROR_LOG(rc); return rc; } @@ -900,119 +901,3 @@ cleanup: return rc; } - -/* - * Set the vpid start and range on the "global" job segment. - */ - -int orte_rmaps_base_set_vpid_range(orte_jobid_t jobid, orte_vpid_t start, orte_vpid_t range) -{ - orte_gpr_value_t *value; - char *segment; - int rc; - - if(ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&value, ORTE_GPR_OVERWRITE, segment, 2, 1))) { - ORTE_ERROR_LOG(rc); - free(segment); - return rc; - } - free(segment); - value->tokens[0] = strdup(ORTE_JOB_GLOBALS); - - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]), ORTE_JOB_VPID_START_KEY, ORTE_VPID, &start))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(value); - return rc; - } - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[1]), ORTE_JOB_VPID_RANGE_KEY, ORTE_VPID, &range))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(value); - return rc; - } - - rc = orte_gpr.put(1, &value); - if (ORTE_SUCCESS != rc) ORTE_ERROR_LOG(rc); - - OBJ_RELEASE(value); - return rc; -} - - -/* - * Get the vpid start and range from the "global" job segment. - */ - -int orte_rmaps_base_get_vpid_range(orte_jobid_t jobid, orte_vpid_t *start, orte_vpid_t *range) -{ - char *segment; - char *tokens[2]; - char *keys[3]; - orte_gpr_value_t** values = NULL; - orte_std_cntr_t i, num_values = 0; - orte_vpid_t *vptr; - int rc; - - /* query the job segment on the registry */ - if(ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - tokens[0] = ORTE_JOB_GLOBALS; - tokens[1] = NULL; - - keys[0] = ORTE_JOB_VPID_START_KEY; - keys[1] = ORTE_JOB_VPID_RANGE_KEY; - keys[2] = NULL; - - rc = orte_gpr.get( - ORTE_GPR_KEYS_AND|ORTE_GPR_TOKENS_OR, - segment, - tokens, - keys, - &num_values, - &values - ); - if(rc != ORTE_SUCCESS) { - free(segment); - ORTE_ERROR_LOG(rc); - return rc; - } - if(num_values != 1) { - rc = ORTE_ERR_NOT_FOUND; - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - for(i=0; icnt; i++) { - if(strcmp(values[0]->keyvals[i]->key, ORTE_JOB_VPID_START_KEY) == 0) { - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&vptr, values[0]->keyvals[i]->value, ORTE_VPID))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - *start = *vptr; - continue; - } - if(strcmp(values[0]->keyvals[i]->key, ORTE_JOB_VPID_RANGE_KEY) == 0) { - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&vptr, values[0]->keyvals[i]->value, ORTE_VPID))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - *range = *vptr; - continue; - } - } - -cleanup: - for(i=0; i -#endif - -#include "opal/class/opal_list.h" -#include "opal/mca/mca.h" -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/ras/ras_types.h" -#include "orte/mca/ras/base/ras_base_node.h" -#include "orte/mca/rmaps/rmaps.h" - - -/* - * Global functions for MCA overall collective open and close - */ -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/** - * RMAPS - */ - -/* - * Mapping of nodes to process ranks. - */ - -struct orte_rmaps_base_node_t { - opal_list_item_t super; - orte_ras_node_t* node; - opal_list_t node_procs; -}; -typedef struct orte_rmaps_base_node_t orte_rmaps_base_node_t; - -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_rmaps_base_node_t); - - -/* - * Mapping of a process rank to a specific node. - */ - -struct orte_rmaps_base_proc_t { - opal_list_item_t super; - char *app; /* name of executable */ - orte_rmaps_base_node_t* proc_node; - orte_process_name_t proc_name; - orte_std_cntr_t proc_rank; - pid_t pid; /* PLS-assigned pid */ - pid_t local_pid; /* pid found by local process */ -}; -typedef struct orte_rmaps_base_proc_t orte_rmaps_base_proc_t; - -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_rmaps_base_proc_t); - - -/* - * Structure that represents the mapping of an application to an - * allocated set of resources. - */ - -struct orte_rmaps_base_map_t { - opal_list_item_t super; - orte_app_context_t *app; - orte_rmaps_base_proc_t** procs; - orte_std_cntr_t num_procs; - opal_list_t nodes; -}; -typedef struct orte_rmaps_base_map_t orte_rmaps_base_map_t; - -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_rmaps_base_map_t); - -ORTE_DECLSPEC int orte_rmaps_base_mapped_node_query(opal_list_t* mapping_list, opal_list_t* nodes_alloc, orte_jobid_t jobid); -ORTE_DECLSPEC int orte_rmaps_base_get_map(orte_jobid_t, opal_list_t* mapping); -ORTE_DECLSPEC int orte_rmaps_base_set_map(orte_jobid_t, opal_list_t* mapping); -ORTE_DECLSPEC int orte_rmaps_base_get_node_map(orte_cellid_t, orte_jobid_t, const char*, opal_list_t* mapping); - - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif diff --git a/orte/mca/rmaps/base/rmaps_base_select.c b/orte/mca/rmaps/base/rmaps_base_map_job.c similarity index 65% rename from orte/mca/rmaps/base/rmaps_base_select.c rename to orte/mca/rmaps/base/rmaps_base_map_job.c index da861d037d..1a4812c475 100644 --- a/orte/mca/rmaps/base/rmaps_base_select.c +++ b/orte/mca/rmaps/base/rmaps_base_map_job.c @@ -2,7 +2,7 @@ * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana * University Research and Technology * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University + * Copyright (c) 2004-2005 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, @@ -24,6 +24,10 @@ #include "opal/util/output.h" #include "opal/mca/mca.h" #include "opal/mca/base/base.h" + +#include "orte/mca/errmgr/errmgr.h" + +#include "orte/mca/rmaps/base/rmaps_private.h" #include "orte/mca/rmaps/base/base.h" @@ -38,13 +42,31 @@ static orte_rmaps_base_module_t *select_any(void); * Function for selecting one component from all those that are * available. */ -orte_rmaps_base_module_t* orte_rmaps_base_select(char *preferred) +int orte_rmaps_base_map(orte_jobid_t job, char *desired_mapper) { - if (NULL != preferred) { - return select_preferred(preferred); + orte_rmaps_base_module_t *module=NULL; + int rc; + + if (NULL != desired_mapper) { + module = select_preferred(desired_mapper); } else { - return select_any(); + module = select_any(); } + + /* check for error */ + if (NULL == module) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + opal_output(orte_rmaps_base.rmaps_output, + "orte:rmaps:base:map: could not find desired mapper component %s", desired_mapper); + return ORTE_ERR_NOT_FOUND; + } + + if (ORTE_SUCCESS != (rc = module->map_job(job, desired_mapper))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + return ORTE_SUCCESS; } @@ -56,7 +78,7 @@ static orte_rmaps_base_module_t *select_preferred(char *name) /* Look for a matching selected name */ opal_output(orte_rmaps_base.rmaps_output, - "orte:base:select: looking for component %s", name); + "orte:rmaps:base:map: looking for component %s", name); for (item = opal_list_get_first(&orte_rmaps_base.rmaps_available); item != opal_list_get_end(&orte_rmaps_base.rmaps_available); item = opal_list_get_next(item)) { @@ -65,7 +87,7 @@ static orte_rmaps_base_module_t *select_preferred(char *name) if (0 == strcmp(name, cmp->component->rmaps_version.mca_component_name)) { opal_output(orte_rmaps_base.rmaps_output, - "orte:base:select: found module for compoent %s", name); + "orte:rmaps:base:map: found module for component %s", name); return cmp->module; } } @@ -73,7 +95,7 @@ static orte_rmaps_base_module_t *select_preferred(char *name) /* Didn't find a matching name */ opal_output(orte_rmaps_base.rmaps_output, - "orte:base:select: did not find module for compoent %s", name); + "orte:rmaps:base:map: did not find module for compoent %s", name); return NULL; } @@ -85,9 +107,9 @@ static orte_rmaps_base_module_t *select_any(void) /* If the list is empty, return NULL */ - if (true == opal_list_is_empty(&orte_rmaps_base.rmaps_available)) { + if (opal_list_is_empty(&orte_rmaps_base.rmaps_available) > 0) { opal_output(orte_rmaps_base.rmaps_output, - "orte:base:select: no components available!"); + "orte:rmaps:base:map: no components available!"); return NULL; } @@ -97,7 +119,7 @@ static orte_rmaps_base_module_t *select_any(void) item = opal_list_get_first(&orte_rmaps_base.rmaps_available); cmp = (orte_rmaps_base_cmp_t *) item; opal_output(orte_rmaps_base.rmaps_output, - "orte:base:select: highest priority component: %s", + "orte:rmaps:base:map: highest priority component: %s", cmp->component->rmaps_version.mca_component_name); return cmp->module; } diff --git a/orte/mca/rmaps/base/rmaps_base_no_ops.c b/orte/mca/rmaps/base/rmaps_base_no_ops.c new file mode 100644 index 0000000000..a5cccff52b --- /dev/null +++ b/orte/mca/rmaps/base/rmaps_base_no_ops.c @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "orte_config.h" + +#include "orte/orte_constants.h" +#include "orte/orte_types.h" + +#include "opal/class/opal_list.h" + +#include "orte/mca/ns/ns_types.h" + +#include "orte/mca/rmaps/base/rmaps_private.h" + + +int orte_rmaps_base_map_no_op(orte_jobid_t job, char *desired_mapper) +{ + return ORTE_ERR_NOT_SUPPORTED; +} + diff --git a/orte/mca/rmaps/base/rmaps_base_node.c b/orte/mca/rmaps/base/rmaps_base_node.c index 1a752e4866..a18329606b 100644 --- a/orte/mca/rmaps/base/rmaps_base_node.c +++ b/orte/mca/rmaps/base/rmaps_base_node.c @@ -32,14 +32,12 @@ #include "orte/util/sys_info.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/smr/smr_types.h" -#include "orte/mca/ras/base/ras_base_node.h" +#include "orte/mca/ras/ras.h" #include "orte/mca/gpr/gpr.h" #include "orte/mca/ns/ns.h" +#include "orte/mca/rmaps/base/rmaps_private.h" #include "orte/mca/rmaps/base/base.h" -#include "orte/mca/rmaps/base/rmaps_base_map.h" -#include "orte/mca/rmaps/base/rmaps_base_node.h" - /* * A sanity check to ensure that all of the requested nodes are actually @@ -106,7 +104,7 @@ int orte_rmaps_base_get_target_nodes(opal_list_t* nodes, orte_jobid_t jobid, ort /** set default answer */ *total_num_slots = 0; - if(ORTE_SUCCESS != (rc = orte_ras_base_node_query_alloc(nodes, jobid))) { + if(ORTE_SUCCESS != (rc = orte_ras.node_query_alloc(nodes, jobid))) { ORTE_ERROR_LOG(rc); return rc; } diff --git a/orte/mca/rmaps/base/rmaps_base_node.h b/orte/mca/rmaps/base/rmaps_base_node.h deleted file mode 100644 index 9083599767..0000000000 --- a/orte/mca/rmaps/base/rmaps_base_node.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * RMAPS framework base functionality. - */ - -#ifndef ORTE_RMAPS_BASE_NODE_H -#define ORTE_RMAPS_BASE_NODE_H - -/* - * includes - */ -#include "orte_config.h" -#include "orte/orte_constants.h" - -#ifdef HAVE_SYS_TYPES_H -#include -#endif - -#include "opal/class/opal_list.h" -#include "orte/mca/ns/ns_types.h" -#include "orte/mca/rmaps/rmaps.h" - - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/** - * RMAPS - */ - -ORTE_DECLSPEC int orte_rmaps_base_get_target_nodes(opal_list_t* node_list, orte_jobid_t jobid, orte_std_cntr_t *total_num_slots); -ORTE_DECLSPEC int orte_rmaps_base_update_node_usage(opal_list_t *nodes); -ORTE_DECLSPEC int orte_rmaps_base_get_mapped_targets(opal_list_t *mapped_node_list, - orte_app_context_t *app, - opal_list_t *master_node_list, - orte_std_cntr_t *total_num_slots); -ORTE_DECLSPEC int orte_rmaps_base_claim_slot(orte_rmaps_base_map_t *map, - orte_ras_node_t *current_node, - orte_jobid_t jobid, orte_vpid_t vpid, - int proc_index, - opal_list_t *nodes, - opal_list_t *fully_used_nodes); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif diff --git a/orte/mca/rmaps/base/rmaps_base_open.c b/orte/mca/rmaps/base/rmaps_base_open.c index 4eed4d3f1b..3ac5799f53 100644 --- a/orte/mca/rmaps/base/rmaps_base_open.c +++ b/orte/mca/rmaps/base/rmaps_base_open.c @@ -25,6 +25,7 @@ #include "opal/mca/base/mca_base_param.h" #include "opal/util/output.h" +#include "orte/mca/rmaps/base/rmaps_private.h" #include "orte/mca/rmaps/base/base.h" @@ -37,20 +38,23 @@ #include "orte/mca/rmaps/base/static-components.h" -/* - * Local functions - */ -static void cmp_constructor(orte_rmaps_base_cmp_t *cmp); -static void cmp_destructor(orte_rmaps_base_cmp_t *cmp); -static int compare(opal_list_item_t **a, opal_list_item_t **b); - /* * Global variables */ orte_rmaps_base_t orte_rmaps_base; -OBJ_CLASS_INSTANCE(orte_rmaps_base_cmp_t, opal_list_item_t, - cmp_constructor, cmp_destructor); +/* + * Declare the RMAPS module to hold the API function pointers + */ +orte_rmaps_base_module_t orte_rmaps = { + orte_rmaps_base_map, + orte_rmaps_base_finalize +}; + +orte_rmaps_base_module_t orte_rmaps_no_op = { + orte_rmaps_base_map_no_op, + orte_rmaps_base_finalize +}; /** * Function for finding and opening either all MCA components, or the one @@ -58,13 +62,8 @@ OBJ_CLASS_INSTANCE(orte_rmaps_base_cmp_t, opal_list_item_t, */ int orte_rmaps_base_open(void) { - opal_list_item_t *item; - mca_base_component_list_item_t *cli; - orte_rmaps_base_component_t *component; - orte_rmaps_base_module_t *module; - int param, priority, value; - orte_rmaps_base_cmp_t *cmp; - char *policy; + int param, value; + char *policy, *requested; /* Debugging / verbose output */ @@ -104,6 +103,28 @@ int orte_rmaps_base_open(void) } + /* Some systems do not want any RMAPS support. In those cases, + * memory consumption is also an issue. For those systems, we + * avoid opening the RMAPS components by checking for a directive + * to use the "null" component. + */ + param = mca_base_param_reg_string_name("rmaps", NULL, NULL, + false, false, NULL, NULL); + if (ORTE_ERROR == mca_base_param_lookup_string(param, &requested)) { + return ORTE_ERROR; + } + if (NULL != requested && 0 == strcmp(requested, "null")) { + /* the user has specifically requested that we use the "null" + * component. In this case, that means we do NOT open any + * components, and we simply use the default module we have + * already defined above + */ + orte_rmaps_base.no_op_selected = true; + orte_rmaps = orte_rmaps_no_op; /* use the no_op module */ + return ORTE_SUCCESS; + } + orte_rmaps_base.no_op_selected = false; + /* Open up all the components that we can find */ if (ORTE_SUCCESS != @@ -113,79 +134,7 @@ int orte_rmaps_base_open(void) return ORTE_ERROR; } - /* Query all the opened components and see if they want to run */ - - OBJ_CONSTRUCT(&orte_rmaps_base.rmaps_available, opal_list_t); - for (item = opal_list_get_first(&orte_rmaps_base.rmaps_opened); - opal_list_get_end(&orte_rmaps_base.rmaps_opened) != item; - item = opal_list_get_next(item)) { - cli = (mca_base_component_list_item_t *) item; - component = (orte_rmaps_base_component_t *) cli->cli_component; - opal_output(orte_rmaps_base.rmaps_output, - "orte:base:open: querying component %s", - component->rmaps_version.mca_component_name); - - /* Call the component's init function and see if it wants to be - selected */ - - module = component->rmaps_init(&priority); - - /* If we got a non-NULL module back, then the component wants - to be considered for selection */ - - if (NULL != module) { - opal_output(orte_rmaps_base.rmaps_output, - "orte:base:open: component %s returns priority %d", - component->rmaps_version.mca_component_name, - priority); - - cmp = OBJ_NEW(orte_rmaps_base_cmp_t); - cmp->component = component; - cmp->module = module; - cmp->priority = priority; - - opal_list_append(&orte_rmaps_base.rmaps_available, &cmp->super); - } else { - opal_output(orte_rmaps_base.rmaps_output, - "orte:base:open: component %s does NOT want to be considered for selection", - component->rmaps_version.mca_component_name); - } - } - - /* Sort the resulting available list in priority order */ - - opal_list_sort(&orte_rmaps_base.rmaps_available, compare); - /* All done */ return ORTE_SUCCESS; } - - -static void cmp_constructor(orte_rmaps_base_cmp_t *cmp) -{ - cmp->component = NULL; - cmp->module = NULL; - cmp->priority = -1; -} - - -static void cmp_destructor(orte_rmaps_base_cmp_t *cmp) -{ - cmp_constructor(cmp); -} - - -static int compare(opal_list_item_t **a, opal_list_item_t **b) -{ - orte_rmaps_base_cmp_t *aa = *((orte_rmaps_base_cmp_t **) a); - orte_rmaps_base_cmp_t *bb = *((orte_rmaps_base_cmp_t **) b); - - if (aa->priority > bb->priority) { - return 1; - } else if (aa->priority == bb->priority) { - return 0; - } else { - return -1; - } -} diff --git a/orte/mca/rmaps/base/rmaps_base_receive.c b/orte/mca/rmaps/base/rmaps_base_receive.c new file mode 100644 index 0000000000..14433aaaaf --- /dev/null +++ b/orte/mca/rmaps/base/rmaps_base_receive.c @@ -0,0 +1,154 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + */ + +/* + * includes + */ +#include "orte_config.h" + +#include "orte/orte_constants.h" +#include "orte/orte_types.h" + +#include "opal/util/output.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/dss/dss.h" +#include "orte/util/proc_info.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rmaps/rmaps.h" +#include "orte/mca/rml/rml.h" + +#include "orte/mca/rmaps/base/rmaps_private.h" + +static bool recv_issued=false; + +int orte_rmaps_base_comm_start(void) +{ + int rc; + + if (recv_issued) { + return ORTE_SUCCESS; + } + + if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_RML_NAME_ANY, + ORTE_RML_TAG_RMAPS, + ORTE_RML_PERSISTENT, + orte_rmaps_base_recv, + NULL))) { + ORTE_ERROR_LOG(rc); + } + recv_issued = true; + + return rc; +} + + +int orte_rmaps_base_comm_stop(void) +{ + int rc; + + if (!recv_issued) { + return ORTE_SUCCESS; + } + + if (ORTE_SUCCESS != (rc = orte_rml.recv_cancel(ORTE_RML_NAME_ANY, ORTE_RML_TAG_RMAPS))) { + ORTE_ERROR_LOG(rc); + } + recv_issued = false; + + return rc; +} + + + +/* + * handle message from proxies + * NOTE: The incoming buffer "buffer" is OBJ_RELEASED by the calling program. + * DO NOT RELEASE THIS BUFFER IN THIS CODE + */ + +void orte_rmaps_base_recv(int status, orte_process_name_t* sender, + orte_buffer_t* buffer, orte_rml_tag_t tag, + void* cbdata) +{ + orte_buffer_t answer; + orte_rmaps_cmd_flag_t command; + orte_std_cntr_t count; + orte_jobid_t job; + char *desired_mapper; + int rc; + + /* get the command */ + count = 1; + if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &command, &count, ORTE_RMAPS_CMD))) { + ORTE_ERROR_LOG(rc); + return; + } + + /* setup to return an answer */ + OBJ_CONSTRUCT(&answer, orte_buffer_t); + + /* pack the command in the answer - this is done to allow the caller to check + * that we are talking about the same command + */ + if (ORTE_SUCCESS != (rc = orte_dss.pack(&answer, &command, 1, ORTE_RMAPS_CMD))) { + ORTE_ERROR_LOG(rc); + return; + } + + switch (command) { + case ORTE_RMAPS_MAP_CMD: + /* get the jobid */ + count = 1; + if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &job, &count, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + goto SEND_ANSWER; + } + + /* get any desired mapper */ + count = 1; + if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &desired_mapper, &count, ORTE_STRING))) { + ORTE_ERROR_LOG(rc); + goto SEND_ANSWER; + } + + /* process the request */ + if (ORTE_SUCCESS != (rc = orte_rmaps.map_job(job, desired_mapper))) { + ORTE_ERROR_LOG(rc); + goto SEND_ANSWER; + } + break; + + default: + ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS); + } + +SEND_ANSWER: + if (0 > orte_rml.send_buffer(sender, &answer, tag, 0)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + } + + /* cleanup */ + OBJ_DESTRUCT(&answer); +} + diff --git a/orte/mca/rmaps/base/rmaps_private.h b/orte/mca/rmaps/base/rmaps_private.h new file mode 100644 index 0000000000..1aa0db0b23 --- /dev/null +++ b/orte/mca/rmaps/base/rmaps_private.h @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + */ + +#ifndef ORTE_MCA_RMAPS_PRIVATE_H +#define ORTE_MCA_RMAPS_PRIVATE_H + +/* + * includes + */ +#include "orte_config.h" +#include "orte/orte_constants.h" + +#include "orte/mca/ns/ns_types.h" +#include "orte/mca/gpr/gpr_types.h" +#include "orte/mca/rml/rml_types.h" + +#include "orte/mca/rmaps/rmaps.h" + +/* + * Functions for use solely within the RMAPS framework + */ +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +/* Define the RMAPS command flag */ +typedef uint8_t orte_rmaps_cmd_flag_t; +#define ORTE_RMAPS_CMD ORTE_UINT8 + +/* define some commands */ +#define ORTE_RMAPS_MAP_CMD 0x01 + +/* Internal support */ +int orte_rmaps_base_comm_start(void); +void orte_rmaps_base_recv(int status, orte_process_name_t* sender, + orte_buffer_t* buffer, orte_rml_tag_t tag, + void* cbdata); + +/* + * RMAPS component/module/priority tuple + */ +struct orte_rmaps_base_cmp_t { + /** Base object */ + opal_list_item_t super; + /** rmaps component */ + orte_rmaps_base_component_t *component; + /** rmaps module */ + orte_rmaps_base_module_t* module; + /** This component's priority */ + int priority; +}; +/* Convenience typedef */ +typedef struct orte_rmaps_base_cmp_t orte_rmaps_base_cmp_t; +/* Class declaration */ +ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_rmaps_base_cmp_t); + + +/* + * Base functions + */ + +ORTE_DECLSPEC int orte_rmaps_base_map(orte_jobid_t job, char *desired_mapper); + +/* + * NO_OP functions + */ +ORTE_DECLSPEC int orte_rmaps_base_map_no_op(orte_jobid_t job, char *desired_mapper); + +/* + * communication functions + */ +int orte_rmaps_base_comm_start(void); +int orte_rmaps_base_comm_stop(void); +void orte_rmaps_base_recv(int status, orte_process_name_t* sender, + orte_buffer_t* buffer, orte_rml_tag_t tag, + void* cbdata); + +/* + * Internal support functions + */ +int orte_rmaps_base_mapped_node_query(opal_list_t* mapping_list, opal_list_t* nodes_alloc, orte_jobid_t jobid); +int orte_rmaps_base_get_map(orte_jobid_t, opal_list_t* mapping); +int orte_rmaps_base_set_map(orte_jobid_t, opal_list_t* mapping); +int orte_rmaps_base_get_node_map(orte_cellid_t, orte_jobid_t, const char*, opal_list_t* mapping); + +int orte_rmaps_base_get_target_nodes(opal_list_t* node_list, orte_jobid_t jobid, orte_std_cntr_t *total_num_slots); +int orte_rmaps_base_update_node_usage(opal_list_t *nodes); +int orte_rmaps_base_get_mapped_targets(opal_list_t *mapped_node_list, + orte_app_context_t *app, + opal_list_t *master_node_list, + orte_std_cntr_t *total_num_slots); + +int orte_rmaps_base_claim_slot(orte_rmaps_base_map_t *map, + orte_ras_node_t *current_node, + orte_jobid_t jobid, orte_vpid_t vpid, + int proc_index, + opal_list_t *nodes, + opal_list_t *fully_used_nodes); + +int orte_rmaps_base_set_vpid_range(orte_jobid_t jobid, orte_vpid_t start, orte_vpid_t range); +int orte_rmaps_base_get_vpid_range(orte_jobid_t jobid, orte_vpid_t *start, orte_vpid_t *range); + +/* + * external API functions will be documented in the mca/rmaps/rmaps.h file + */ + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif +#endif diff --git a/orte/mca/rmaps/proxy/Makefile.am b/orte/mca/rmaps/proxy/Makefile.am new file mode 100644 index 0000000000..dd426bd8a6 --- /dev/null +++ b/orte/mca/rmaps/proxy/Makefile.am @@ -0,0 +1,46 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +sources = \ + rmaps_proxy.h \ + rmaps_proxy_component.c \ + rmaps_proxy.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if OMPI_BUILD_rmaps_proxy_DSO +component_noinst = +component_install = mca_rmaps_proxy.la +else +component_noinst = libmca_rmaps_proxy.la +component_install = +endif + +mcacomponentdir = $(libdir)/openmpi +mcacomponent_LTLIBRARIES = $(component_install) +mca_rmaps_proxy_la_SOURCES = $(sources) +mca_rmaps_proxy_la_LDFLAGS = -module -avoid-version +mca_rmaps_proxy_la_LIBADD = \ + $(top_ompi_builddir)/orte/liborte.la \ + $(top_ompi_builddir)/opal/libopal.la + +noinst_LTLIBRARIES = $(component_noinst) +libmca_rmaps_proxy_la_SOURCES =$(sources) +libmca_rmaps_proxy_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/rmaps/proxy/configure.params b/orte/mca/rmaps/proxy/configure.params new file mode 100644 index 0000000000..ee3e56ea34 --- /dev/null +++ b/orte/mca/rmaps/proxy/configure.params @@ -0,0 +1,23 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Specific to this module + +PARAM_INIT_FILE=rmaps_proxy_component.c +PARAM_CONFIG_FILES="Makefile" diff --git a/orte/mca/rmaps/proxy/rmaps_proxy.c b/orte/mca/rmaps/proxy/rmaps_proxy.c new file mode 100644 index 0000000000..08ceef3af8 --- /dev/null +++ b/orte/mca/rmaps/proxy/rmaps_proxy.c @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + + +#include "orte_config.h" +#include "orte/orte_constants.h" + +#include "orte/dss/dss.h" +#include "orte/runtime/runtime.h" +#include "orte/mca/ns/ns_types.h" +#include "orte/mca/gpr/gpr_types.h" +#include "orte/mca/rml/rml.h" +#include "orte/mca/errmgr/errmgr.h" + +#include "orte/mca/rmaps/base/rmaps_private.h" +#include "orte/mca/rmaps/proxy/rmaps_proxy.h" + +/* + * Map a job + */ +int orte_rmaps_proxy_map(orte_jobid_t job, char *desired_mapper) +{ + orte_buffer_t* cmd; + orte_buffer_t* answer; + orte_rmaps_cmd_flag_t command; + orte_std_cntr_t count; + int rc; + + command = ORTE_RMAPS_MAP_CMD; + + cmd = OBJ_NEW(orte_buffer_t); + if (cmd == NULL) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + /* pack the command */ + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_RMAPS_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + /* pack the jobid */ + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &job, 1, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + /* pack the desired mapper - since the DSS can handle NULL strings, don't + * bother checking for that here + */ + if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &desired_mapper, 1, ORTE_STRING))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(cmd); + return rc; + } + + /* send the request */ + if (0 > orte_rml.send_buffer(orte_rmaps_proxy_globals.replica, cmd, ORTE_RML_TAG_RMAPS, 0)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(cmd); + return ORTE_ERR_COMM_FAILURE; + } + OBJ_RELEASE(cmd); + + /* setup a buffer for the answer */ + answer = OBJ_NEW(orte_buffer_t); + if(answer == NULL) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + + /* enter a blocking receive until we hear back */ + if (0 > orte_rml.recv_buffer(orte_rmaps_proxy_globals.replica, answer, ORTE_RML_TAG_RMAPS)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(answer); + return ORTE_ERR_COMM_FAILURE; + } + + count = 1; + if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_RMAPS_CMD))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(answer); + return rc; + } + + /* check that this is the right command */ + if (ORTE_RMAPS_MAP_CMD != command) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + OBJ_RELEASE(answer); + return ORTE_ERR_COMM_FAILURE; + } + + /* clean up and leave */ + OBJ_RELEASE(answer); + return ORTE_SUCCESS; +} diff --git a/orte/mca/rmaps/proxy/rmaps_proxy.h b/orte/mca/rmaps/proxy/rmaps_proxy.h new file mode 100644 index 0000000000..09faa09b2a --- /dev/null +++ b/orte/mca/rmaps/proxy/rmaps_proxy.h @@ -0,0 +1,70 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + * + */ +#ifndef ORTE_RMAPS_PROXY_H +#define ORTE_RMAPS_PROXY_H + + +#include "orte_config.h" +#include "orte/orte_types.h" + +#include "orte/mca/ns/ns_types.h" + +#include "orte/mca/rmaps/rmaps.h" + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +/* + * Module open / close + */ +int orte_rmaps_proxy_open(void); +int orte_rmaps_proxy_close(void); + + +/* + * Startup / Shutdown + */ +orte_rmaps_base_module_t* +orte_rmaps_proxy_component_init(int *priority); + +int orte_rmaps_proxy_finalize(void); + +/* + * globals used within the component + */ +typedef struct { + int debug; + orte_process_name_t *replica; +} orte_rmaps_proxy_globals_t; + + +extern orte_rmaps_proxy_globals_t orte_rmaps_proxy_globals; + +/* + * Component API functions + */ +int orte_rmaps_proxy_map(orte_jobid_t job, char *desired_mapper); + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif + +#endif diff --git a/orte/mca/rmaps/proxy/rmaps_proxy_component.c b/orte/mca/rmaps/proxy/rmaps_proxy_component.c new file mode 100644 index 0000000000..9e50cb0bea --- /dev/null +++ b/orte/mca/rmaps/proxy/rmaps_proxy_component.c @@ -0,0 +1,151 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + * The Open MPI General Purpose Registry - Proxy component + * + */ + +/* + * includes + */ +#include "orte_config.h" + +#include "orte/orte_constants.h" +#include "orte/orte_types.h" + +#include "opal/util/output.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/base.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/util/proc_info.h" +#include "orte/mca/ns/ns_types.h" + +#include "orte/mca/rmaps/rmaps.h" +#include "orte/mca/rmaps/base/rmaps_private.h" + +#include "rmaps_proxy.h" + + +/* + * Struct of function pointers that need to be initialized + */ +orte_rmaps_base_component_t mca_rmaps_proxy_component = { + { + ORTE_RMAPS_BASE_VERSION_1_3_0, + + "proxy", /* MCA module name */ + ORTE_MAJOR_VERSION, /* MCA module major version */ + ORTE_MINOR_VERSION, /* MCA module minor version */ + ORTE_RELEASE_VERSION, /* MCA module release version */ + orte_rmaps_proxy_open, /* module open */ + orte_rmaps_proxy_close /* module close */ + }, + { + false /* checkpoint / restart */ + }, + orte_rmaps_proxy_component_init /* module init */ +}; + +/* + * setup the function pointers for the module + */ +static orte_rmaps_base_module_t orte_rmaps_proxy = { + orte_rmaps_proxy_map, + orte_rmaps_proxy_finalize +}; + + +/* + * Whether or not we allowed this component to be selected + */ +static bool initialized = false; + +/* local globals */ +orte_rmaps_proxy_globals_t orte_rmaps_proxy_globals; + +/* + * Open the component + */ +int orte_rmaps_proxy_open(void) +{ + int id, tmp; + + id = mca_base_param_register_int("rmaps", "proxy", "debug", NULL, 0); + mca_base_param_lookup_int(id, &tmp); + if (tmp) { + orte_rmaps_proxy_globals.debug = true; + } else { + orte_rmaps_proxy_globals.debug = false; + } + + return ORTE_SUCCESS; +} + +/* + * Close the component + */ +int orte_rmaps_proxy_close(void) +{ + return ORTE_SUCCESS; +} + +orte_rmaps_base_module_t* +orte_rmaps_proxy_component_init(int *priority) +{ + if (orte_rmaps_proxy_globals.debug) { + opal_output(0, "rmaps_proxy_init called"); + } + + /* If we are an HNP or an orted, then don't pick us! */ + if (orte_process_info.seed || orte_process_info.daemon) { + /* don't take me! */ + return NULL; + } + + /* Return a module (choose an arbitrary, positive priority -- + it's only relevant compared to other components). */ + + *priority = 10; + + /* define the replica for us to use - for now, just point + * to the name service replica + */ + orte_rmaps_proxy_globals.replica = orte_process_info.ns_replica; + + initialized = true; + return &orte_rmaps_proxy; +} + +/* + * finalize routine + */ +int orte_rmaps_proxy_finalize(void) +{ + if (orte_rmaps_proxy_globals.debug) { + opal_output(0, "[%lu,%lu,%lu] rmaps_proxy_finalize called", + ORTE_NAME_ARGS(orte_process_info.my_name)); + } + + initialized = false; + + /* All done */ + return ORTE_SUCCESS; +} diff --git a/orte/mca/rmaps/rmaps.h b/orte/mca/rmaps/rmaps.h index a685a7ae66..07b76188bc 100644 --- a/orte/mca/rmaps/rmaps.h +++ b/orte/mca/rmaps/rmaps.h @@ -47,6 +47,7 @@ #include "opal/mca/mca.h" #include "orte/mca/ns/ns_types.h" +#include "orte/mca/rmaps/rmaps_types.h" /* * rmaps module functions @@ -55,7 +56,7 @@ /** * Mapping function */ -typedef int (*orte_rmaps_base_module_map_fn_t)(orte_jobid_t job); +typedef int (*orte_rmaps_base_module_map_fn_t)(orte_jobid_t job, char *desired_mapper); /** * Cleanup module resources. @@ -63,18 +64,18 @@ typedef int (*orte_rmaps_base_module_map_fn_t)(orte_jobid_t job); typedef int (*orte_rmaps_base_module_finalize_fn_t)(void); /* - * rmaps module version 1.0.0 + * rmaps module version 1.3.0 */ -struct orte_rmaps_base_module_1_0_0_t { +struct orte_rmaps_base_module_1_3_0_t { /** Maping function pointer */ - orte_rmaps_base_module_map_fn_t map; + orte_rmaps_base_module_map_fn_t map_job; /** Finalization function pointer */ - orte_rmaps_base_module_finalize_fn_t finalize; + orte_rmaps_base_module_finalize_fn_t finalize; }; /** Convenience typedef */ -typedef struct orte_rmaps_base_module_1_0_0_t orte_rmaps_base_module_1_0_0_t; +typedef struct orte_rmaps_base_module_1_3_0_t orte_rmaps_base_module_1_3_0_t; /** Convenience typedef */ -typedef orte_rmaps_base_module_1_0_0_t orte_rmaps_base_module_t; +typedef orte_rmaps_base_module_1_3_0_t orte_rmaps_base_module_t; /* @@ -89,9 +90,9 @@ typedef orte_rmaps_base_module_t* (*orte_rmaps_base_component_init_fn_t)( /** - * rmaps component version 1.0.0 + * rmaps component version 1.3.0 */ -struct orte_rmaps_base_component_1_0_0_t { +struct orte_rmaps_base_component_1_3_0_t { /** Base MCA structure */ mca_base_component_t rmaps_version; /** Base MCA data */ @@ -100,19 +101,23 @@ struct orte_rmaps_base_component_1_0_0_t { orte_rmaps_base_component_init_fn_t rmaps_init; }; /** Convenience typedef */ -typedef struct orte_rmaps_base_component_1_0_0_t orte_rmaps_base_component_1_0_0_t; +typedef struct orte_rmaps_base_component_1_3_0_t orte_rmaps_base_component_1_3_0_t; /** Convenience typedef */ -typedef orte_rmaps_base_component_1_0_0_t orte_rmaps_base_component_t; +typedef orte_rmaps_base_component_1_3_0_t orte_rmaps_base_component_t; /** * Macro for use in components that are of type rmaps v1.0.0 */ -#define ORTE_RMAPS_BASE_VERSION_1_0_0 \ - /* rmaps v1.0 is chained to MCA v1.0 */ \ +#define ORTE_RMAPS_BASE_VERSION_1_3_0 \ + /* rmaps v1.3 is chained to MCA v1.0 */ \ MCA_BASE_VERSION_1_0_0, \ - /* rmaps v1.0 */ \ - "rmaps", 1, 0, 0 + /* rmaps v1.3 */ \ + "rmaps", 1, 3, 0 + + +/* global structure for accessing RMAPS modules */ +ORTE_DECLSPEC extern orte_rmaps_base_module_t orte_rmaps; #endif diff --git a/orte/mca/rmaps/rmaps_types.h b/orte/mca/rmaps/rmaps_types.h index e69de29bb2..3c62940a5d 100644 --- a/orte/mca/rmaps/rmaps_types.h +++ b/orte/mca/rmaps/rmaps_types.h @@ -0,0 +1,92 @@ +/* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + */ + +#ifndef ORTE_MCA_RMAPS_TYPES_H +#define ORTE_MCA_RMAPS_TYPES_H + +#include "orte_config.h" +#include "orte/orte_constants.h" + +#include "orte/mca/ns/ns_types.h" +#include "orte/mca/gpr/gpr_types.h" +#include "orte/mca/ras/ras_types.h" +#include "orte/mca/rml/rml_types.h" + +#include "orte/mca/rmaps/rmaps.h" + +/* + * General MAP types + */ +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +/* + * Mapping of nodes to process ranks. + */ + +struct orte_rmaps_base_node_t { + opal_list_item_t super; + orte_ras_node_t* node; + opal_list_t node_procs; +}; +typedef struct orte_rmaps_base_node_t orte_rmaps_base_node_t; + +OBJ_CLASS_DECLARATION(orte_rmaps_base_node_t); + + +/* + * Mapping of a process rank to a specific node. + */ + +struct orte_rmaps_base_proc_t { + opal_list_item_t super; + char *app; /* name of executable */ + orte_rmaps_base_node_t* proc_node; + orte_process_name_t proc_name; + orte_std_cntr_t proc_rank; + pid_t pid; /* PLS-assigned pid */ + pid_t local_pid; /* pid found by local process */ +}; +typedef struct orte_rmaps_base_proc_t orte_rmaps_base_proc_t; + +OBJ_CLASS_DECLARATION(orte_rmaps_base_proc_t); + + +/* + * Structure that represents the mapping of an application to an + * allocated set of resources. + */ + +struct orte_rmaps_base_map_t { + opal_list_item_t super; + orte_app_context_t *app; + orte_rmaps_base_proc_t** procs; + orte_std_cntr_t num_procs; + opal_list_t nodes; +}; +typedef struct orte_rmaps_base_map_t orte_rmaps_base_map_t; + +OBJ_CLASS_DECLARATION(orte_rmaps_base_map_t); + + +#if defined(c_plusplus) || defined(__cplusplus) +} +#endif +#endif diff --git a/orte/mca/rmaps/round_robin/rmaps_rr.c b/orte/mca/rmaps/round_robin/rmaps_rr.c index 2a70bc66bc..78a48e07b4 100644 --- a/orte/mca/rmaps/round_robin/rmaps_rr.c +++ b/orte/mca/rmaps/round_robin/rmaps_rr.c @@ -37,10 +37,9 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/ns/ns.h" #include "orte/mca/gpr/gpr.h" -#include "orte/mca/rmaps/base/base.h" -#include "orte/mca/rmgr/base/base.h" -#include "orte/mca/rmaps/base/rmaps_base_map.h" -#include "orte/mca/rmaps/base/rmaps_base_node.h" +#include "orte/mca/rmgr/rmgr.h" + +#include "orte/mca/rmaps/base/rmaps_private.h" #include "rmaps_rr.h" @@ -234,7 +233,7 @@ static int map_app_by_slot( * Create a round-robin mapping for the job. */ -static int orte_rmaps_rr_map(orte_jobid_t jobid) +static int orte_rmaps_rr_map(orte_jobid_t jobid, char *ignore) { orte_app_context_t** context, *app; orte_rmaps_base_map_t* map; @@ -249,7 +248,7 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid) bool bynode = true, modify_app_context = false; /* query for the application context and allocated nodes */ - if(ORTE_SUCCESS != (rc = orte_rmgr_base_get_app_context(jobid, &context, &num_context))) { + if(ORTE_SUCCESS != (rc = orte_rmgr.get_app_context(jobid, &context, &num_context))) { ORTE_ERROR_LOG(rc); return rc; } @@ -473,7 +472,7 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid) } /* save vpid start/range on the job segment */ - if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_vpid_range(jobid, job_vpid_start, num_procs))) { + if (ORTE_SUCCESS != (rc = orte_rmgr.set_vpid_range(jobid, job_vpid_start, num_procs))) { ORTE_ERROR_LOG(rc); goto cleanup; } @@ -494,7 +493,7 @@ static int orte_rmaps_rr_map(orte_jobid_t jobid) processes */ if (modify_app_context) { - if (ORTE_SUCCESS != (rc = orte_rmgr_base_put_app_context(jobid, context, 1))) { + if (ORTE_SUCCESS != (rc = orte_rmgr.store_app_context(jobid, context, 1))) { ORTE_ERROR_LOG(rc); } } diff --git a/orte/mca/rmaps/round_robin/rmaps_rr_component.c b/orte/mca/rmaps/round_robin/rmaps_rr_component.c index 2ddb25b821..8b6a0ee3a1 100644 --- a/orte/mca/rmaps/round_robin/rmaps_rr_component.c +++ b/orte/mca/rmaps/round_robin/rmaps_rr_component.c @@ -22,6 +22,8 @@ #include "opal/mca/base/mca_base_param.h" #include "orte/util/proc_info.h" #include "opal/util/output.h" + +#include "orte/mca/rmaps/base/rmaps_private.h" #include "rmaps_rr.h" /* @@ -39,10 +41,10 @@ orte_rmaps_round_robin_component_t mca_rmaps_round_robin_component = { information about the component itself */ { - /* Indicate that we are a iof v1.0.0 component (which also + /* Indicate that we are a rmaps v1.3.0 component (which also implies a specific MCA version) */ - ORTE_RMAPS_BASE_VERSION_1_0_0, + ORTE_RMAPS_BASE_VERSION_1_3_0, "round_robin", /* MCA component name */ ORTE_MAJOR_VERSION, /* MCA component major version */ @@ -100,6 +102,18 @@ static int orte_rmaps_round_robin_open(void) static orte_rmaps_base_module_t* orte_rmaps_round_robin_init(int *priority) { + int rc; + + /* if I am NOT an HNP, then don't consider me! */ + if (!orte_process_info.seed) { + return NULL; + } + + /* start the receive function */ + if (ORTE_SUCCESS != (rc = orte_rmaps_base_comm_start())) { + return NULL; + } + *priority = mca_rmaps_round_robin_component.priority; return &orte_rmaps_round_robin_module; } diff --git a/orte/mca/rmgr/base/Makefile.am b/orte/mca/rmgr/base/Makefile.am index 2c293911d0..640171d733 100644 --- a/orte/mca/rmgr/base/Makefile.am +++ b/orte/mca/rmgr/base/Makefile.am @@ -17,17 +17,19 @@ # headers += \ + base/rmgr_private.h \ base/base.h libmca_rmgr_la_SOURCES += \ + base/rmgr_base_check_context.c \ base/rmgr_base_context.c \ base/rmgr_base_close.c \ base/rmgr_base_open.c \ - base/rmgr_base_pack.c \ - base/rmgr_base_unpack.c \ + base/rmgr_base_receive.c \ base/rmgr_base_select.c \ base/rmgr_base_stage_gate.c \ base/rmgr_base_stubs.c \ + base/rmgr_base_vpid_support_fns.c \ base/data_type_support/rmgr_data_type_compare_fns.c \ base/data_type_support/rmgr_data_type_copy_fns.c \ base/data_type_support/rmgr_data_type_print_fns.c \ diff --git a/orte/mca/rmgr/base/base.h b/orte/mca/rmgr/base/base.h index 071ed10e8a..005839a6db 100644 --- a/orte/mca/rmgr/base/base.h +++ b/orte/mca/rmgr/base/base.h @@ -42,10 +42,6 @@ extern "C" { #endif -/* - * Internal definitions - */ - /* * function definitions */ @@ -53,146 +49,6 @@ ORTE_DECLSPEC int orte_rmgr_base_open(void); ORTE_DECLSPEC int orte_rmgr_base_select(void); ORTE_DECLSPEC int orte_rmgr_base_close(void); -ORTE_DECLSPEC int orte_rmgr_base_get_app_context( - orte_jobid_t jobid, - orte_app_context_t*** app_context, - orte_std_cntr_t* num_context); - -ORTE_DECLSPEC int orte_rmgr_base_put_app_context( - orte_jobid_t jobid, - orte_app_context_t** app_context, - orte_std_cntr_t num_context); - -ORTE_DECLSPEC int orte_rmgr_base_get_job_slots( - orte_jobid_t jobid, - orte_std_cntr_t* num_slots); - -ORTE_DECLSPEC int orte_rmgr_base_set_job_slots( - orte_jobid_t jobid, - orte_std_cntr_t num_slots); - - -/* - * Pack/unpack - */ - -ORTE_DECLSPEC int orte_rmgr_base_pack_cmd( - orte_buffer_t* buffer, - orte_rmgr_cmd_t cmd, - orte_jobid_t jobid); - -ORTE_DECLSPEC int orte_rmgr_base_pack_create_cmd( - orte_buffer_t* buffer, - orte_app_context_t** context, - orte_std_cntr_t num_context); - -ORTE_DECLSPEC int orte_rmgr_base_pack_terminate_proc_cmd( - orte_buffer_t* buffer, - const orte_process_name_t* name); - -ORTE_DECLSPEC int orte_rmgr_base_pack_signal_job_cmd( - orte_buffer_t* buffer, - orte_jobid_t job, - int32_t signal); - -ORTE_DECLSPEC int orte_rmgr_base_pack_signal_proc_cmd( - orte_buffer_t* buffer, - const orte_process_name_t* name, - int32_t signal); - -ORTE_DECLSPEC int orte_rmgr_base_unpack_rsp( - orte_buffer_t* buffer); - -ORTE_DECLSPEC int orte_rmgr_base_unpack_create_rsp( - orte_buffer_t* buffer, - orte_jobid_t*); - -ORTE_DECLSPEC int orte_rmgr_base_cmd_dispatch( - orte_buffer_t* req, - orte_buffer_t* rsp); - -/* - * Base functions that are common to all implementations - can be overridden - */ -int orte_rmgr_base_create_not_available( - orte_app_context_t** app_context, - orte_std_cntr_t num_context, - orte_jobid_t* jobid); -int orte_rmgr_base_query_not_available(void); -int orte_rmgr_base_allocate_not_available(orte_jobid_t); -int orte_rmgr_base_deallocate_not_available(orte_jobid_t); -int orte_rmgr_base_map_not_available(orte_jobid_t); -int orte_rmgr_base_launch_not_available(orte_jobid_t); -int orte_rmgr_base_terminate_job_not_available(orte_jobid_t); -int orte_rmgr_base_terminate_proc_not_available(const orte_process_name_t*); -int orte_rmgr_base_signal_job_not_available(orte_jobid_t, int32_t); -int orte_rmgr_base_signal_proc_not_available(const orte_process_name_t*, int32_t); -ORTE_DECLSPEC int orte_rmgr_base_proc_stage_gate_init(orte_jobid_t job); -ORTE_DECLSPEC int orte_rmgr_base_proc_stage_gate_subscribe(orte_jobid_t job, orte_gpr_notify_cb_fn_t, void*, orte_proc_state_t); -ORTE_DECLSPEC int orte_rmgr_base_proc_stage_gate_mgr( - orte_gpr_notify_message_t *msg); -ORTE_DECLSPEC int orte_rmgr_base_proc_stage_gate_mgr_abort( - orte_gpr_notify_message_t *msg); -int orte_rmgr_base_spawn_not_available( - orte_app_context_t** app_context, - orte_std_cntr_t num_context, - orte_jobid_t* jobid, - orte_rmgr_cb_fn_t cbfn, - orte_proc_state_t cb_conditions); -int orte_rmgr_base_finalize_not_available(void); - -/* - * DATA TYPE PACKING FUNCTIONS - */ -int orte_rmgr_base_pack_app_context(orte_buffer_t *buffer, void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - -int orte_rmgr_base_pack_app_context_map(orte_buffer_t *buffer, void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - -/* - * DATA TYPE UNPACKING FUNCTIONS - */ -int orte_rmgr_base_unpack_app_context(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - -int orte_rmgr_base_unpack_app_context_map(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - - -/* - * COMPARE FUNCTIONS - */ -int orte_rmgr_base_compare_app_context(orte_app_context_t *value1, orte_app_context_t *value2, orte_data_type_t type); - -int orte_rmgr_base_compare_app_context_map(orte_app_context_map_t *value1, orte_app_context_map_t *value2, orte_data_type_t type); - -/* - * COPY FUNCTIONS - */ -int orte_rmgr_base_copy_app_context(orte_app_context_t **dest, orte_app_context_t *src, orte_data_type_t type); - -int orte_rmgr_base_copy_app_context_map(orte_app_context_map_t **dest, orte_app_context_map_t *src, orte_data_type_t type); - -/* - * PRINT FUNCTIONS - */ -int orte_rmgr_base_print_app_context(char **output, char *prefix, orte_app_context_t *src, orte_data_type_t type); - -int orte_rmgr_base_print_app_context_map(char **output, char *prefix, orte_app_context_map_t *src, orte_data_type_t type); - -/* - * SIZE FUNCTIONS - */ -int orte_rmgr_base_size_app_context(size_t *size, orte_app_context_t *src, orte_data_type_t type); - -int orte_rmgr_base_size_app_context_map(size_t *size, orte_app_context_map_t *src, orte_data_type_t type); - -/* - * RELEASE FUNCTIONS - */ -void orte_rmgr_base_std_obj_release(orte_data_value_t *value); - /* * globals that might be needed */ diff --git a/orte/mca/rmgr/base/data_type_support/rmgr_data_type_compare_fns.c b/orte/mca/rmgr/base/data_type_support/rmgr_data_type_compare_fns.c index 40325260a4..e1043f755d 100755 --- a/orte/mca/rmgr/base/data_type_support/rmgr_data_type_compare_fns.c +++ b/orte/mca/rmgr/base/data_type_support/rmgr_data_type_compare_fns.c @@ -24,7 +24,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/dss/dss_internal.h" -#include "orte/mca/rmgr/base/base.h" +#include "orte/mca/rmgr/base/rmgr_private.h" /* * APP CONTEXT diff --git a/orte/mca/rmgr/base/data_type_support/rmgr_data_type_copy_fns.c b/orte/mca/rmgr/base/data_type_support/rmgr_data_type_copy_fns.c index 80ae590442..b2e5dd4e90 100755 --- a/orte/mca/rmgr/base/data_type_support/rmgr_data_type_copy_fns.c +++ b/orte/mca/rmgr/base/data_type_support/rmgr_data_type_copy_fns.c @@ -28,7 +28,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/dss/dss_internal.h" -#include "orte/mca/rmgr/base/base.h" +#include "orte/mca/rmgr/base/rmgr_private.h" /* * APP CONTEXT diff --git a/orte/mca/rmgr/base/data_type_support/rmgr_data_type_packing_fns.c b/orte/mca/rmgr/base/data_type_support/rmgr_data_type_packing_fns.c index 2213a00d29..54fc394e6e 100644 --- a/orte/mca/rmgr/base/data_type_support/rmgr_data_type_packing_fns.c +++ b/orte/mca/rmgr/base/data_type_support/rmgr_data_type_packing_fns.c @@ -28,7 +28,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/dss/dss_internal.h" -#include "orte/mca/rmgr/base/base.h" +#include "orte/mca/rmgr/base/rmgr_private.h" /* * APP CONTEXT diff --git a/orte/mca/rmgr/base/data_type_support/rmgr_data_type_print_fns.c b/orte/mca/rmgr/base/data_type_support/rmgr_data_type_print_fns.c index a2c0981e58..2e320f9e92 100755 --- a/orte/mca/rmgr/base/data_type_support/rmgr_data_type_print_fns.c +++ b/orte/mca/rmgr/base/data_type_support/rmgr_data_type_print_fns.c @@ -28,7 +28,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/dss/dss_internal.h" -#include "orte/mca/rmgr/base/base.h" +#include "orte/mca/rmgr/base/rmgr_private.h" /* * APP CONTEXT diff --git a/orte/mca/rmgr/base/data_type_support/rmgr_data_type_release_fns.c b/orte/mca/rmgr/base/data_type_support/rmgr_data_type_release_fns.c index 1736800c3e..0ea04377bb 100755 --- a/orte/mca/rmgr/base/data_type_support/rmgr_data_type_release_fns.c +++ b/orte/mca/rmgr/base/data_type_support/rmgr_data_type_release_fns.c @@ -24,7 +24,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/dss/dss_internal.h" -#include "orte/mca/rmgr/base/base.h" +#include "orte/mca/rmgr/base/rmgr_private.h" /* * STANDARD OBJECT RELEASE diff --git a/orte/mca/rmgr/base/data_type_support/rmgr_data_type_size_fns.c b/orte/mca/rmgr/base/data_type_support/rmgr_data_type_size_fns.c index 1726630bb8..1e975b662b 100755 --- a/orte/mca/rmgr/base/data_type_support/rmgr_data_type_size_fns.c +++ b/orte/mca/rmgr/base/data_type_support/rmgr_data_type_size_fns.c @@ -26,7 +26,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/dss/dss_internal.h" -#include "orte/mca/rmgr/base/base.h" +#include "orte/mca/rmgr/base/rmgr_private.h" /* * APP CONTEXT diff --git a/orte/mca/rmgr/base/data_type_support/rmgr_data_type_unpacking_fns.c b/orte/mca/rmgr/base/data_type_support/rmgr_data_type_unpacking_fns.c index 237b0100c6..9d952f3045 100644 --- a/orte/mca/rmgr/base/data_type_support/rmgr_data_type_unpacking_fns.c +++ b/orte/mca/rmgr/base/data_type_support/rmgr_data_type_unpacking_fns.c @@ -28,7 +28,7 @@ #include "orte/mca/errmgr/errmgr.h" #include "orte/dss/dss_internal.h" -#include "orte/mca/rmgr/base/base.h" +#include "orte/mca/rmgr/base/rmgr_private.h" /* * APP_CONTEXT diff --git a/orte/mca/pls/base/pls_base_context.c b/orte/mca/rmgr/base/rmgr_base_check_context.c similarity index 95% rename from orte/mca/pls/base/pls_base_context.c rename to orte/mca/rmgr/base/rmgr_base_check_context.c index fdc1157af9..39aa5db1e2 100644 --- a/orte/mca/pls/base/pls_base_context.c +++ b/orte/mca/rmgr/base/rmgr_base_check_context.c @@ -12,6 +12,7 @@ */ #include "orte_config.h" +#include "orte/orte_constants.h" #include #include @@ -38,16 +39,17 @@ #include "opal/util/show_help.h" #include "opal/util/basename.h" #include "opal/util/path.h" -#include "orte/orte_constants.h" -#include "orte/mca/pls/base/base.h" + #include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rmgr/base/rmgr_private.h" + #if !defined(__WINDOWS__) extern char **environ; #endif /* !defined(__WINDOWS__) */ -int orte_pls_base_check_context_cwd(orte_app_context_t *context, - bool want_chdir) +int orte_rmgr_base_check_context_cwd(orte_app_context_t *context, + bool want_chdir) { bool good = true; char *tmp; @@ -115,7 +117,7 @@ int orte_pls_base_check_context_cwd(orte_app_context_t *context, return ORTE_SUCCESS; } -int orte_pls_base_check_context_app(orte_app_context_t *context) +int orte_rmgr_base_check_context_app(orte_app_context_t *context) { char *tmp; char hostname[MAXHOSTNAMELEN]; diff --git a/orte/mca/rmgr/base/rmgr_base_context.c b/orte/mca/rmgr/base/rmgr_base_context.c index ae2aab4226..1a0f475126 100644 --- a/orte/mca/rmgr/base/rmgr_base_context.c +++ b/orte/mca/rmgr/base/rmgr_base_context.c @@ -16,26 +16,21 @@ * $HEADER$ */ #include "orte_config.h" +#include "orte/orte_constants.h" + #include #ifdef HAVE_UNISTD_H #include #endif #include -#include "orte/orte_constants.h" - #include "opal/util/trace.h" -#include "orte/dss/dss.h" -#include "orte/mca/rmgr/base/base.h" -#include "orte/mca/rds/base/base.h" -#include "orte/mca/ras/base/base.h" -#include "orte/mca/rmaps/base/base.h" -#include "orte/mca/pls/base/base.h" #include "orte/mca/gpr/gpr.h" #include "orte/mca/ns/ns.h" #include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rmgr/base/rmgr_private.h" /* * Create the job segment and initialize the application context. diff --git a/orte/mca/rmgr/base/rmgr_base_open.c b/orte/mca/rmgr/base/rmgr_base_open.c index 26de803282..623d55618b 100644 --- a/orte/mca/rmgr/base/rmgr_base_open.c +++ b/orte/mca/rmgr/base/rmgr_base_open.c @@ -31,6 +31,7 @@ #include "opal/util/argv.h" #include "opal/util/trace.h" +#include "orte/mca/rmgr/base/rmgr_private.h" #include "orte/mca/rmgr/base/base.h" @@ -49,20 +50,18 @@ orte_rmgr_base_t orte_rmgr_base; orte_rmgr_base_module_t orte_rmgr = { - orte_rmgr_base_query_not_available, + NULL, orte_rmgr_base_create_not_available, - orte_rmgr_base_allocate_not_available, - orte_rmgr_base_deallocate_not_available, - orte_rmgr_base_map_not_available, - orte_rmgr_base_launch_not_available, - orte_rmgr_base_terminate_job_not_available, - orte_rmgr_base_terminate_proc_not_available, - orte_rmgr_base_signal_job_not_available, - orte_rmgr_base_signal_proc_not_available, orte_rmgr_base_spawn_not_available, - orte_rmgr_base_proc_stage_gate_init, - orte_rmgr_base_proc_stage_gate_mgr, - orte_rmgr_base_finalize_not_available + orte_rmgr_base_finalize_not_available, + /** SUPPORT FUNCTIONS ***/ + orte_rmgr_base_get_app_context, + orte_rmgr_base_put_app_context, + orte_rmgr_base_check_context_cwd, + orte_rmgr_base_check_context_app, + orte_rmgr_base_set_vpid_range, + orte_rmgr_base_get_vpid_range + }; /* @@ -172,7 +171,7 @@ int orte_rmgr_base_open(void) orte_rmgr_base.rmgr_output = -1; } - /* register the base system types with the DPS */ + /* register the base system types with the DSS */ tmp = ORTE_APP_CONTEXT; if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_rmgr_base_pack_app_context, orte_rmgr_base_unpack_app_context, diff --git a/orte/mca/rmgr/base/rmgr_base_pack.c b/orte/mca/rmgr/base/rmgr_base_pack.c deleted file mode 100644 index 4aa014e4a3..0000000000 --- a/orte/mca/rmgr/base/rmgr_base_pack.c +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "orte_config.h" -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#include - -#include "opal/util/trace.h" - -#include "orte/orte_constants.h" -#include "orte/dss/dss.h" -#include "orte/mca/rmgr/base/base.h" -#include "orte/mca/errmgr/errmgr.h" - - -/* - * - */ - -int orte_rmgr_base_pack_cmd(orte_buffer_t* buffer, orte_rmgr_cmd_t cmd, orte_jobid_t jobid) -{ - int rc; - - OPAL_TRACE(4); - - rc = orte_dss.pack(buffer, &cmd, 1, ORTE_RMGR_CMD); - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - return rc; - } - - rc = orte_dss.pack(buffer, &jobid, 1, ORTE_JOBID); - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - return rc; - } - return ORTE_SUCCESS; -} - -/* - * - */ - -int orte_rmgr_base_pack_create_cmd( - orte_buffer_t* buffer, - orte_app_context_t** context, - orte_std_cntr_t num_context) -{ - int rc; - - orte_rmgr_cmd_t cmd = ORTE_RMGR_CMD_CREATE; - - OPAL_TRACE(4); - - rc = orte_dss.pack(buffer, &cmd, 1, ORTE_RMGR_CMD); - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - return rc; - } - - rc = orte_dss.pack(buffer, &num_context, 1, ORTE_STD_CNTR); - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - return rc; - } - rc = orte_dss.pack(buffer, context, num_context, ORTE_APP_CONTEXT); - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - return rc; - } - return ORTE_SUCCESS; -} - - -int orte_rmgr_base_pack_terminate_proc_cmd( - orte_buffer_t* buffer, - const orte_process_name_t* name) -{ - int rc; - - orte_rmgr_cmd_t cmd = ORTE_RMGR_CMD_TERM_PROC; - - OPAL_TRACE(4); - - rc = orte_dss.pack(buffer, &cmd, 1, ORTE_RMGR_CMD); - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - return rc; - } - - rc = orte_dss.pack(buffer, (void*)name, 1, ORTE_NAME); - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - return rc; - } - return ORTE_SUCCESS; -} - - -int orte_rmgr_base_pack_signal_job_cmd( - orte_buffer_t* buffer, - orte_jobid_t job, - int32_t signal) -{ - int rc; - - orte_rmgr_cmd_t cmd = ORTE_RMGR_CMD_SIGNAL_JOB; - - OPAL_TRACE(4); - - rc = orte_dss.pack(buffer, &cmd, 1, ORTE_RMGR_CMD); - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - return rc; - } - - rc = orte_dss.pack(buffer, &job, 1, ORTE_JOBID); - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - return rc; - } - - rc = orte_dss.pack(buffer, (void*)&signal, 1, ORTE_INT32); - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} - - -int orte_rmgr_base_pack_signal_proc_cmd( - orte_buffer_t* buffer, - const orte_process_name_t* name, - int32_t signal) -{ - int rc; - - orte_rmgr_cmd_t cmd = ORTE_RMGR_CMD_SIGNAL_PROC; - - OPAL_TRACE(4); - - rc = orte_dss.pack(buffer, &cmd, 1, ORTE_RMGR_CMD); - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - return rc; - } - - rc = orte_dss.pack(buffer, (void*)name, 1, ORTE_NAME); - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - return rc; - } - - rc = orte_dss.pack(buffer, (void*)&signal, 1, ORTE_INT32); - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} - - -int orte_rmgr_base_unpack_rsp( - orte_buffer_t* buffer) -{ - int32_t rc; - orte_std_cntr_t cnt = 1; - - OPAL_TRACE(4); - - if(ORTE_SUCCESS != (rc = orte_dss.unpack(buffer,&rc,&cnt,ORTE_INT32))) { - ORTE_ERROR_LOG(rc); - return rc; - } - return rc; -} - -int orte_rmgr_base_unpack_create_rsp( - orte_buffer_t* buffer, - orte_jobid_t* jobid) -{ - int32_t rc; - orte_std_cntr_t cnt; - - OPAL_TRACE(4); - - cnt = 1; - if(ORTE_SUCCESS != (rc = orte_dss.unpack(buffer,jobid,&cnt,ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - cnt = 1; - if(ORTE_SUCCESS != (rc = orte_dss.unpack(buffer,&rc,&cnt,ORTE_INT32))) { - ORTE_ERROR_LOG(rc); - return rc; - } - return rc; -} - - diff --git a/orte/mca/rmgr/base/rmgr_base_receive.c b/orte/mca/rmgr/base/rmgr_base_receive.c new file mode 100644 index 0000000000..b12e033980 --- /dev/null +++ b/orte/mca/rmgr/base/rmgr_base_receive.c @@ -0,0 +1,205 @@ +/* -*- C -*- + * + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + * + */ + +/* + * includes + */ +#include "orte_config.h" + +#include "orte/orte_constants.h" +#include "orte/orte_types.h" + +#include "opal/util/output.h" +#include "opal/mca/mca.h" +#include "opal/mca/base/mca_base_param.h" + +#include "orte/dss/dss.h" +#include "orte/util/proc_info.h" +#include "orte/mca/errmgr/errmgr.h" +#include "orte/mca/rmgr/rmgr.h" +#include "orte/mca/rml/rml.h" + +#include "orte/mca/rmgr/base/rmgr_private.h" + +static bool recv_issued=false; + +int orte_rmgr_base_comm_start(void) +{ + int rc; + + if (recv_issued) { + return ORTE_SUCCESS; + } + + if (ORTE_SUCCESS != (rc = orte_rml.recv_buffer_nb(ORTE_RML_NAME_ANY, + ORTE_RML_TAG_RMGR, + ORTE_RML_PERSISTENT, + orte_rmgr_base_recv, + NULL))) { + ORTE_ERROR_LOG(rc); + } + recv_issued = true; + + return rc; +} + +int orte_rmgr_base_comm_stop(void) +{ + int rc; + + if (!recv_issued) { + return ORTE_SUCCESS; + } + + if (ORTE_SUCCESS != (rc = orte_rml.recv_cancel(ORTE_RML_NAME_ANY, ORTE_RML_TAG_RMGR))) { + ORTE_ERROR_LOG(rc); + } + recv_issued = false; + + return rc; +} + + + +/* + * handle message from proxies + * NOTE: The incoming buffer "buffer" is OBJ_RELEASED by the calling program. + * DO NOT RELEASE THIS BUFFER IN THIS CODE + */ + +void orte_rmgr_base_recv(int status, orte_process_name_t* sender, + orte_buffer_t* buffer, orte_rml_tag_t tag, + void* cbdata) +{ + orte_buffer_t answer; + orte_rmgr_cmd_t command; + orte_std_cntr_t count, num_context; + orte_jobid_t job; + orte_app_context_t **context; + int rc; + + /* get the command */ + count = 1; + if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &command, &count, ORTE_RMGR_CMD))) { + ORTE_ERROR_LOG(rc); + return; + } + + /* setup to return an answer */ + OBJ_CONSTRUCT(&answer, orte_buffer_t); + + /* pack the command in the answer - this is done to allow the caller to check + * that we are talking about the same command + */ + if (ORTE_SUCCESS != (rc = orte_dss.pack(&answer, &command, 1, ORTE_RMGR_CMD))) { + ORTE_ERROR_LOG(rc); + return; + } + + switch (command) { + case ORTE_RMGR_SETUP_JOB_CMD: + /* get the number of app_contexts */ + count = 1; + if(ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &num_context, &count, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + return; + } + + /* allocate space for them */ + if(NULL == (context = (orte_app_context_t**)malloc(sizeof(orte_app_context_t*)*num_context))) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return; + } + + /* and unpack them */ + count = num_context; + if(ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, context, &count, ORTE_APP_CONTEXT))) { + ORTE_ERROR_LOG(rc); + free(context); + return; + } + + /* process the request */ + if (ORTE_SUCCESS != (rc = orte_rmgr.setup_job(context, num_context, &job))) { + ORTE_ERROR_LOG(rc); + goto SEND_ANSWER; + } + + /* return the new jobid */ + if(ORTE_SUCCESS != (rc = orte_dss.pack(&answer, &job, 1, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + goto SEND_ANSWER; + } + break; + + case ORTE_RMGR_SPAWN_JOB_CMD: + /* for proxy operations, we don't pack callback functions - the proxy + * component takes care of registering the subscription itself. + * Hence, the only things we receive here are the app_context objects + */ + /* get the number of app_contexts */ + count = 1; + if(ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &num_context, &count, ORTE_STD_CNTR))) { + ORTE_ERROR_LOG(rc); + return; + } + + /* allocate space for them */ + if(NULL == (context = (orte_app_context_t**)malloc(sizeof(orte_app_context_t*)*num_context))) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return; + } + + /* and unpack them */ + count = num_context; + if(ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, context, &count, ORTE_APP_CONTEXT))) { + ORTE_ERROR_LOG(rc); + free(context); + return; + } + + /* process the request */ + if (ORTE_SUCCESS != (rc = orte_rmgr.spawn_job(context, num_context, &job, NULL, ORTE_PROC_STATE_NONE))) { + ORTE_ERROR_LOG(rc); + goto SEND_ANSWER; + } + + /* return the new jobid */ + if(ORTE_SUCCESS != (rc = orte_dss.pack(&answer, &job, 1, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + goto SEND_ANSWER; + } + break; + + default: + ORTE_ERROR_LOG(ORTE_ERR_VALUE_OUT_OF_BOUNDS); + } + +SEND_ANSWER: + if (0 > orte_rml.send_buffer(sender, &answer, tag, 0)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + } + + /* cleanup */ + OBJ_DESTRUCT(&answer); +} + diff --git a/orte/mca/rmgr/base/rmgr_base_select.c b/orte/mca/rmgr/base/rmgr_base_select.c index 4cc29c0588..d9387bddd8 100644 --- a/orte/mca/rmgr/base/rmgr_base_select.c +++ b/orte/mca/rmgr/base/rmgr_base_select.c @@ -88,6 +88,11 @@ int orte_rmgr_base_select(void) /* save the module for later usage */ orte_rmgr = *best_module; + + /* let the module do it's own init, if needed */ + if (NULL != orte_rmgr.module_init) { + orte_rmgr.module_init(); + } return ORTE_SUCCESS; } diff --git a/orte/mca/rmgr/base/rmgr_base_stage_gate.c b/orte/mca/rmgr/base/rmgr_base_stage_gate.c index dc95273f8b..baecee4aa0 100644 --- a/orte/mca/rmgr/base/rmgr_base_stage_gate.c +++ b/orte/mca/rmgr/base/rmgr_base_stage_gate.c @@ -38,160 +38,20 @@ #include "orte/mca/rml/rml.h" #include "orte/mca/smr/smr.h" -#include "orte/mca/rmgr/base/base.h" +#include "orte/mca/rmgr/base/rmgr_private.h" int orte_rmgr_base_proc_stage_gate_init(orte_jobid_t job) { - orte_std_cntr_t i, num_counters, num_named_trigs; - orte_std_cntr_t zero=0; int rc; - orte_gpr_value_t *value; - char* keys[] = { - /* changes to this ordering need to be reflected in code below */ - /* We need to set up counters for all the defined ORTE process states, even though - * the launch system doesn't actually use them all. This must be done so that - * user-defined callbacks can be generated - otherwise, they won't happen! - */ - ORTE_PROC_NUM_AT_INIT, - ORTE_PROC_NUM_LAUNCHED, - ORTE_PROC_NUM_RUNNING, - ORTE_PROC_NUM_AT_STG1, - ORTE_PROC_NUM_AT_STG2, - ORTE_PROC_NUM_AT_STG3, - ORTE_PROC_NUM_FINALIZED, - ORTE_PROC_NUM_TERMINATED, - ORTE_PROC_NUM_ABORTED - }; - char* trig_names[] = { - /* changes to this ordering need to be reflected in code below */ - ORTE_ALL_INIT_TRIGGER, - ORTE_ALL_LAUNCHED_TRIGGER, - ORTE_ALL_RUNNING_TRIGGER, - ORTE_STG1_TRIGGER, - ORTE_STG2_TRIGGER, - ORTE_STG3_TRIGGER, - ORTE_NUM_FINALIZED_TRIGGER, - ORTE_NUM_TERMINATED_TRIGGER - }; - char *segment, *trig_name, *tokens[2], *trig_keys[2]; - orte_gpr_trigger_id_t id; - orte_std_cntr_t trig_level; - OPAL_TRACE(1); - - num_counters = sizeof(keys)/sizeof(keys[0]); - num_named_trigs= sizeof(trig_names)/sizeof(trig_names[0]); - - if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, job))) { + /* init the stage gates */ + if (ORTE_SUCCESS != (rc = orte_smr.init_job_stage_gates(job, orte_rmgr_base_proc_stage_gate_mgr, NULL))) { ORTE_ERROR_LOG(rc); return rc; } - - /* setup the counters */ - if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&value, - ORTE_GPR_OVERWRITE | ORTE_GPR_TOKENS_XAND | ORTE_GPR_KEYS_OR, - segment, num_counters, 1))) { - - ORTE_ERROR_LOG(rc); - return rc; - } - - value->tokens[0] = strdup(ORTE_JOB_GLOBALS); /* put counters in the job's globals container */ - - for (i=0; i < num_counters; i++) { - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[i]), keys[i], ORTE_STD_CNTR, &zero))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(value); - return rc; - } - } - - /* put the counters on the registry */ - if (ORTE_SUCCESS != (rc = orte_gpr.put(1, &value))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(value); - return rc; - } - OBJ_RELEASE(value); - - /*** DEFINE STAGE GATE STANDARD TRIGGERS ***/ - /* The standard triggers will return the trigger counters so that we - * can get required information for notifying processes. Other - * subscriptions will then attach to them. - */ - tokens[0] = strdup(ORTE_JOB_GLOBALS); - tokens[1] = NULL; - - trig_keys[0] = strdup(ORTE_JOB_SLOTS_KEY); - for (i=0; i < num_named_trigs; i++) { - trig_keys[1] = strdup(keys[i]); - if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&trig_name, - trig_names[i], job))) { - ORTE_ERROR_LOG(rc); - free(tokens[0]); - free(segment); - free(trig_keys[0]); - free(trig_keys[1]); - return rc; - } - if (ORTE_SUCCESS != (rc = orte_gpr.define_trigger(&id, trig_name, - ORTE_GPR_TRIG_INCLUDE_TRIG_CNTRS | ORTE_GPR_TRIG_ONE_SHOT | - ORTE_GPR_TRIG_ROUTE_DATA_THRU_ME | ORTE_GPR_TRIG_CMP_LEVELS, - ORTE_GPR_TOKENS_XAND | ORTE_GPR_KEYS_OR, - segment, tokens, 2, trig_keys, - orte_rmgr_base_proc_stage_gate_mgr, NULL))) { - ORTE_ERROR_LOG(rc); - free(tokens[0]); - free(segment); - free(trig_name); - free(trig_keys[0]); - free(trig_keys[1]); - return rc; - } - free(trig_name); - free(trig_keys[1]); - } - free(trig_keys[0]); - - /* Now define the abort trigger. Again, only the trigger counter needs - * to be returned, so we don't need to setup a subscription to get - * other information - */ - trig_keys[0] = strdup(ORTE_PROC_NUM_ABORTED); - if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&trig_name, - ORTE_NUM_ABORTED_TRIGGER, job))) { - ORTE_ERROR_LOG(rc); - free(tokens[0]); - free(segment); - free(trig_keys[0]); - return rc; - } - trig_level = 1; - if (ORTE_SUCCESS != (rc = orte_gpr.define_trigger_level(&id, trig_name, - ORTE_GPR_TRIG_INCLUDE_TRIG_CNTRS | ORTE_GPR_TRIG_ONE_SHOT | - ORTE_GPR_TRIG_AT_LEVEL, - ORTE_GPR_TOKENS_XAND | ORTE_GPR_KEYS_OR, - segment, tokens, 1, trig_keys, &trig_level, - orte_rmgr_base_proc_stage_gate_mgr_abort, NULL))) { - ORTE_ERROR_LOG(rc); - free(tokens[0]); - free(segment); - free(trig_name); - free(trig_keys[0]); - return rc; - } - free(tokens[0]); - free(segment); - free(trig_name); - free(trig_keys[0]); - - /* set the job state to "launched" */ - if (ORTE_SUCCESS != (rc = orte_smr.set_job_state(job, ORTE_JOB_STATE_LAUNCHED))) { - ORTE_ERROR_LOG(rc); - } - - return rc; + + return ORTE_SUCCESS; } @@ -234,7 +94,12 @@ int orte_rmgr_base_proc_stage_gate_mgr(orte_gpr_notify_message_t *msg) } /* set the job state to the appropriate level */ - if (orte_schema.check_std_trigger_name(msg->target, ORTE_STG1_TRIGGER)) { + if (orte_schema.check_std_trigger_name(msg->target, ORTE_ALL_LAUNCHED_TRIGGER)) { + if (ORTE_SUCCESS != (rc = orte_smr.set_job_state(job, ORTE_JOB_STATE_LAUNCHED))) { + ORTE_ERROR_LOG(rc); + goto CLEANUP; + } + } else if (orte_schema.check_std_trigger_name(msg->target, ORTE_STG1_TRIGGER)) { if (ORTE_SUCCESS != (rc = orte_smr.set_job_state(job, ORTE_JOB_STATE_AT_STG1))) { ORTE_ERROR_LOG(rc); goto CLEANUP; @@ -281,119 +146,3 @@ CLEANUP: if (NULL != recipients) free(recipients); return rc; } - -int orte_rmgr_base_proc_stage_gate_mgr_abort(orte_gpr_notify_message_t *msg) -{ - orte_jobid_t job; - int rc; - - OPAL_TRACE(1); - - /* All stage gate triggers are named, so we can extract the jobid - * directly from the trigger name - */ - if (ORTE_SUCCESS != (rc = orte_schema.extract_jobid_from_std_trigger_name(&job, msg->target))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* set the job status to "aborted" */ - - if (ORTE_SUCCESS != (rc = orte_smr.set_job_state(job, ORTE_JOB_STATE_ABORTED))) { - ORTE_ERROR_LOG(rc); - } - - orte_errmgr.incomplete_start(job); - - return ORTE_SUCCESS; -} - - -/* - * Routine that tools such as orterun can use to subscribe - * to events on all counters. - */ - -int orte_rmgr_base_proc_stage_gate_subscribe(orte_jobid_t job, orte_gpr_notify_cb_fn_t cbfunc, void* cbdata, orte_proc_state_t cb_conditions) -{ - orte_std_cntr_t i; - int rc; - char *segment, *trig_name, *tokens[2]; - orte_gpr_subscription_id_t id; - /** the order of the next three definitions MUST match */ - orte_proc_state_t state[] = { - ORTE_PROC_STATE_INIT, - ORTE_PROC_STATE_LAUNCHED, - ORTE_PROC_STATE_RUNNING, - ORTE_PROC_STATE_AT_STG1, - ORTE_PROC_STATE_AT_STG2, - ORTE_PROC_STATE_AT_STG3, - ORTE_PROC_STATE_FINALIZED, - ORTE_PROC_STATE_TERMINATED, - ORTE_PROC_STATE_ABORTED - }; - char* keys[] = { - ORTE_PROC_NUM_AT_INIT, - ORTE_PROC_NUM_LAUNCHED, - ORTE_PROC_NUM_RUNNING, - ORTE_PROC_NUM_AT_STG1, - ORTE_PROC_NUM_AT_STG2, - ORTE_PROC_NUM_AT_STG3, - ORTE_PROC_NUM_FINALIZED, - ORTE_PROC_NUM_TERMINATED, - ORTE_PROC_NUM_ABORTED - }; - char* trig_names[] = { - ORTE_ALL_INIT_TRIGGER, - ORTE_ALL_LAUNCHED_TRIGGER, - ORTE_ALL_RUNNING_TRIGGER, - ORTE_STG1_TRIGGER, - ORTE_STG2_TRIGGER, - ORTE_STG3_TRIGGER, - ORTE_NUM_FINALIZED_TRIGGER, - ORTE_NUM_TERMINATED_TRIGGER, - ORTE_NUM_ABORTED_TRIGGER - }; - orte_std_cntr_t num_counters = sizeof(keys)/sizeof(keys[0]); - - OPAL_TRACE(1); - - /* identify the segment for this job */ - if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, job))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* setup the tokens */ - tokens[0]=ORTE_JOB_GLOBALS; - tokens[1]=NULL; - - for (i=0; i < num_counters; i++) { - if (state[i] & cb_conditions) { - /** want this one - attach ourselves to the appropriate standard trigger */ - if (ORTE_SUCCESS != - (rc = orte_schema.get_std_trigger_name(&trig_name, trig_names[i], job))) { - ORTE_ERROR_LOG(rc); - free(segment); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_gpr.subscribe_1(&id, trig_name, NULL, - ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG, - ORTE_GPR_TOKENS_OR | ORTE_GPR_KEYS_OR, - segment, tokens, keys[i], - cbfunc, cbdata))) { - ORTE_ERROR_LOG(rc); - free(segment); - free(trig_name); - return rc; - } - free(trig_name); - } - } - free(segment); - - return ORTE_SUCCESS; -} - - diff --git a/orte/mca/rmgr/base/rmgr_base_stubs.c b/orte/mca/rmgr/base/rmgr_base_stubs.c index f07c4d8199..62412ebf02 100644 --- a/orte/mca/rmgr/base/rmgr_base_stubs.c +++ b/orte/mca/rmgr/base/rmgr_base_stubs.c @@ -23,7 +23,7 @@ #include "orte/orte_constants.h" #include "opal/mca/mca.h" -#include "orte/mca/rmgr/base/base.h" +#include "orte/mca/rmgr/base/rmgr_private.h" /* @@ -38,60 +38,6 @@ orte_rmgr_base_create_not_available( return ORTE_ERR_UNREACH; } -int -orte_rmgr_base_query_not_available(void) -{ - return ORTE_ERR_UNREACH; -} - -int -orte_rmgr_base_allocate_not_available(orte_jobid_t jobid) -{ - return ORTE_ERR_UNREACH; -} - -int -orte_rmgr_base_deallocate_not_available(orte_jobid_t jobid) -{ - return ORTE_ERR_UNREACH; -} - -int -orte_rmgr_base_map_not_available(orte_jobid_t jobid) -{ - return ORTE_ERR_UNREACH; -} - -int -orte_rmgr_base_launch_not_available(orte_jobid_t jobid) -{ - return ORTE_ERR_UNREACH; -} - -int -orte_rmgr_base_terminate_job_not_available(orte_jobid_t jobid) -{ - return ORTE_ERR_UNREACH; -} - -int -orte_rmgr_base_terminate_proc_not_available(const orte_process_name_t* proc_name) -{ - return ORTE_ERR_UNREACH; -} - -int -orte_rmgr_base_signal_job_not_available(orte_jobid_t jobid, int32_t signal) -{ - return ORTE_ERR_UNREACH; -} - -int -orte_rmgr_base_signal_proc_not_available(const orte_process_name_t* proc_name, int32_t signal) -{ - return ORTE_ERR_UNREACH; -} - int orte_rmgr_base_spawn_not_available( orte_app_context_t** app_context, diff --git a/orte/mca/rmgr/base/rmgr_base_unpack.c b/orte/mca/rmgr/base/rmgr_base_unpack.c deleted file mode 100644 index eaeb5e0f6e..0000000000 --- a/orte/mca/rmgr/base/rmgr_base_unpack.c +++ /dev/null @@ -1,283 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "orte_config.h" -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#include - -#include "orte/orte_constants.h" -#include "opal/util/output.h" -#include "opal/util/trace.h" - -#include "orte/dss/dss.h" -#include "orte/mca/rmgr/base/base.h" -#include "orte/mca/errmgr/errmgr.h" - - -/* - * - */ - -static int orte_rmgr_base_cmd_query(orte_buffer_t* req, orte_buffer_t* rsp) -{ - int32_t rc = orte_rmgr.query(); - - OPAL_TRACE(4); - - return orte_dss.pack(rsp, &rc, 1, ORTE_INT32); -} - -static int orte_rmgr_base_cmd_create(orte_buffer_t* req, orte_buffer_t* rsp) -{ - int rc; - int32_t ret; - orte_app_context_t** context; - orte_jobid_t jobid; - orte_std_cntr_t i, cnt, num_context; - - OPAL_TRACE(4); - - cnt = 1; - if(ORTE_SUCCESS != (rc = orte_dss.unpack(req, &num_context, &cnt, ORTE_STD_CNTR))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if(NULL == (context = (orte_app_context_t**)malloc(sizeof(orte_app_context_t*)*num_context))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - cnt = num_context; - if(ORTE_SUCCESS != (rc = orte_dss.unpack(req, context, &cnt, ORTE_APP_CONTEXT))) { - ORTE_ERROR_LOG(rc); - free(context); - return rc; - } - - ret = orte_rmgr.create(context, num_context, &jobid); - ret = orte_rmgr_base_proc_stage_gate_init(jobid); - - if(ORTE_SUCCESS != (rc = orte_dss.pack(rsp, &jobid, 1, ORTE_JOBID))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - if(ORTE_SUCCESS != (rc = orte_dss.pack(rsp, &ret, 1, ORTE_INT32))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - -cleanup: - for(i=0; i + +#include "opal/util/output.h" +#include "opal/class/opal_list.h" + +#include "orte/dss/dss.h" +#include "orte/mca/schema/schema.h" +#include "orte/mca/gpr/gpr.h" +#include "orte/mca/errmgr/errmgr.h" + +#include "orte/mca/rmgr/base/rmgr_private.h" + +/** + * Set the vpid start and range for a job/pset on the registry + */ + +int orte_rmgr_base_set_vpid_range(orte_jobid_t jobid, orte_vpid_t start, orte_vpid_t range) +{ + orte_gpr_value_t *value; + char *segment; + int rc; + + if(ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&value, ORTE_GPR_OVERWRITE, segment, 2, 1))) { + ORTE_ERROR_LOG(rc); + free(segment); + return rc; + } + free(segment); + value->tokens[0] = strdup(ORTE_JOB_GLOBALS); + + if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]), ORTE_JOB_VPID_START_KEY, ORTE_VPID, &start))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(value); + return rc; + } + if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[1]), ORTE_JOB_VPID_RANGE_KEY, ORTE_VPID, &range))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(value); + return rc; + } + + rc = orte_gpr.put(1, &value); + if (ORTE_SUCCESS != rc) ORTE_ERROR_LOG(rc); + + OBJ_RELEASE(value); + return rc; +} + + +/** + * Get the vpid start and range for a job/pset from the registry + */ + +int orte_rmgr_base_get_vpid_range(orte_jobid_t jobid, orte_vpid_t *start, orte_vpid_t *range) +{ + char *segment; + char *tokens[2]; + char *keys[3]; + orte_gpr_value_t** values = NULL; + orte_std_cntr_t i, num_values = 0; + orte_vpid_t *vptr; + int rc; + + /* query the job segment on the registry */ + if(ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, jobid))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + tokens[0] = ORTE_JOB_GLOBALS; + tokens[1] = NULL; + + keys[0] = ORTE_JOB_VPID_START_KEY; + keys[1] = ORTE_JOB_VPID_RANGE_KEY; + keys[2] = NULL; + + rc = orte_gpr.get( + ORTE_GPR_KEYS_AND|ORTE_GPR_TOKENS_OR, + segment, + tokens, + keys, + &num_values, + &values + ); + if(rc != ORTE_SUCCESS) { + free(segment); + ORTE_ERROR_LOG(rc); + return rc; + } + if(num_values != 1) { + rc = ORTE_ERR_NOT_FOUND; + ORTE_ERROR_LOG(rc); + goto cleanup; + } + + for(i=0; icnt; i++) { + if(strcmp(values[0]->keyvals[i]->key, ORTE_JOB_VPID_START_KEY) == 0) { + if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&vptr, values[0]->keyvals[i]->value, ORTE_VPID))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + *start = *vptr; + continue; + } + if(strcmp(values[0]->keyvals[i]->key, ORTE_JOB_VPID_RANGE_KEY) == 0) { + if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&vptr, values[0]->keyvals[i]->value, ORTE_VPID))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + *range = *vptr; + continue; + } + } + +cleanup: + for(i=0; i (rc = orte_rml.send_buffer(ORTE_RML_NAME_SEED, &cmd, ORTE_RML_TAG_RMGR_SVC, 0))) { + /* send the command */ + if(0 > (rc = orte_rml.send_buffer(ORTE_RML_NAME_SEED, &cmd, ORTE_RML_TAG_RMGR, 0))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&cmd); return rc; @@ -126,233 +111,36 @@ static int orte_rmgr_proxy_create( /* wait for response */ OBJ_CONSTRUCT(&rsp, orte_buffer_t); - if(0 > (rc = orte_rml.recv_buffer(ORTE_RML_NAME_SEED, &rsp, ORTE_RML_TAG_RMGR_CLNT))) { + if(0 > (rc = orte_rml.recv_buffer(ORTE_RML_NAME_SEED, &rsp, ORTE_RML_TAG_RMGR))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&rsp); return rc; } - rc = orte_rmgr_base_unpack_create_rsp(&rsp, jobid); - if(ORTE_SUCCESS != rc) { + /* get the returned command */ + count = 1; + if (ORTE_SUCCESS != (rc = orte_dss.unpack(&rsp, &command, &count, ORTE_RMGR_CMD))) { ORTE_ERROR_LOG(rc); OBJ_DESTRUCT(&rsp); return rc; } + /* and check it to ensure valid comm */ + if (ORTE_RMGR_SETUP_JOB_CMD != command) { + OBJ_DESTRUCT(&rsp); + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + return ORTE_ERR_COMM_FAILURE; + } + + /* get the jobid */ + count = 1; + if(ORTE_SUCCESS != (rc = orte_dss.unpack(&rsp, jobid, &count, ORTE_JOBID))) { + ORTE_ERROR_LOG(rc); + } + OBJ_DESTRUCT(&rsp); return rc; } -static int orte_rmgr_proxy_cmd(orte_rmgr_cmd_t cmd_id, orte_jobid_t jobid) -{ - orte_buffer_t cmd; - orte_buffer_t rsp; - int rc; - - OPAL_TRACE(2); - - /* construct command */ - OBJ_CONSTRUCT(&cmd, orte_buffer_t); - rc = orte_rmgr_base_pack_cmd(&cmd, cmd_id, jobid); - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&cmd); - return rc; - } - - if(0 > (rc = orte_rml.send_buffer(ORTE_RML_NAME_SEED, &cmd, ORTE_RML_TAG_RMGR_SVC, 0))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&cmd); - return rc; - } - OBJ_DESTRUCT(&cmd); - - /* wait for response */ - OBJ_CONSTRUCT(&rsp, orte_buffer_t); - if(0 > (rc = orte_rml.recv_buffer(ORTE_RML_NAME_SEED, &rsp, ORTE_RML_TAG_RMGR_CLNT))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&rsp); - return rc; - } - - rc = orte_rmgr_base_unpack_rsp(&rsp); - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&rsp); - return rc; - } - OBJ_DESTRUCT(&rsp); - return rc; -} - - -static int orte_rmgr_proxy_query(void) -{ - OPAL_TRACE(1); - - return orte_rmgr_proxy_cmd(ORTE_RMGR_CMD_QUERY, 0); -} - -static int orte_rmgr_proxy_allocate(orte_jobid_t jobid) -{ - OPAL_TRACE(1); - - return orte_rmgr_proxy_cmd(ORTE_RMGR_CMD_ALLOCATE, jobid); -} - -static int orte_rmgr_proxy_deallocate(orte_jobid_t jobid) -{ - OPAL_TRACE(1); - - return orte_rmgr_proxy_cmd(ORTE_RMGR_CMD_DEALLOCATE, jobid); -} - -static int orte_rmgr_proxy_map(orte_jobid_t jobid) -{ - OPAL_TRACE(1); - - return orte_rmgr_proxy_cmd(ORTE_RMGR_CMD_MAP, jobid); -} - -static int orte_rmgr_proxy_launch(orte_jobid_t jobid) -{ - OPAL_TRACE(1); - - return orte_rmgr_proxy_cmd(ORTE_RMGR_CMD_LAUNCH, jobid); -} - -static int orte_rmgr_proxy_terminate_job(orte_jobid_t jobid) -{ - OPAL_TRACE(1); - - return orte_rmgr_proxy_cmd(ORTE_RMGR_CMD_TERM_JOB, jobid); -} - -static int orte_rmgr_proxy_terminate_proc(const orte_process_name_t* proc_name) -{ - orte_buffer_t cmd; - orte_buffer_t rsp; - int rc; - - OPAL_TRACE(1); - - /* construct command */ - OBJ_CONSTRUCT(&cmd, orte_buffer_t); - rc = orte_rmgr_base_pack_terminate_proc_cmd(&cmd, proc_name); - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&cmd); - return rc; - } - - if(0 > (rc = orte_rml.send_buffer(ORTE_RML_NAME_SEED, &cmd, ORTE_RML_TAG_RMGR_SVC, 0))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&cmd); - return rc; - } - OBJ_DESTRUCT(&cmd); - - /* wait for response */ - OBJ_CONSTRUCT(&rsp, orte_buffer_t); - if(0 > (rc = orte_rml.recv_buffer(ORTE_RML_NAME_SEED, &rsp, ORTE_RML_TAG_RMGR_CLNT))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&rsp); - return rc; - } - - rc = orte_rmgr_base_unpack_rsp(&rsp); - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&rsp); - return rc; - } - OBJ_DESTRUCT(&rsp); - return ORTE_SUCCESS; -} - -static int orte_rmgr_proxy_signal_job(orte_jobid_t jobid, int32_t signal) -{ - orte_buffer_t cmd; - orte_buffer_t rsp; - int rc; - - OPAL_TRACE(1); - - /* construct command */ - OBJ_CONSTRUCT(&cmd, orte_buffer_t); - rc = orte_rmgr_base_pack_signal_job_cmd(&cmd, jobid, signal); - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&cmd); - return rc; - } - - if(0 > (rc = orte_rml.send_buffer(ORTE_RML_NAME_SEED, &cmd, ORTE_RML_TAG_RMGR_SVC, 0))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&cmd); - return rc; - } - OBJ_DESTRUCT(&cmd); - - /* wait for response */ - OBJ_CONSTRUCT(&rsp, orte_buffer_t); - if(0 > (rc = orte_rml.recv_buffer(ORTE_RML_NAME_SEED, &rsp, ORTE_RML_TAG_RMGR_CLNT))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&rsp); - return rc; - } - - rc = orte_rmgr_base_unpack_rsp(&rsp); - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&rsp); - return rc; - } - OBJ_DESTRUCT(&rsp); - return ORTE_SUCCESS; -} - -static int orte_rmgr_proxy_signal_proc(const orte_process_name_t* proc_name, int32_t signal) -{ - orte_buffer_t cmd; - orte_buffer_t rsp; - int rc; - - OPAL_TRACE(1); - - /* construct command */ - OBJ_CONSTRUCT(&cmd, orte_buffer_t); - rc = orte_rmgr_base_pack_signal_proc_cmd(&cmd, proc_name, signal); - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&cmd); - return rc; - } - - if(0 > (rc = orte_rml.send_buffer(ORTE_RML_NAME_SEED, &cmd, ORTE_RML_TAG_RMGR_SVC, 0))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&cmd); - return rc; - } - OBJ_DESTRUCT(&cmd); - - /* wait for response */ - OBJ_CONSTRUCT(&rsp, orte_buffer_t); - if(0 > (rc = orte_rml.recv_buffer(ORTE_RML_NAME_SEED, &rsp, ORTE_RML_TAG_RMGR_CLNT))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&rsp); - return rc; - } - - rc = orte_rmgr_base_unpack_rsp(&rsp); - if(ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&rsp); - return rc; - } - OBJ_DESTRUCT(&rsp); - return ORTE_SUCCESS; -} - static void orte_rmgr_proxy_wireup_stdin(orte_jobid_t jobid) { int rc; @@ -385,7 +173,7 @@ static void orte_rmgr_proxy_callback(orte_gpr_notify_data_t *data, void *cbdata) OPAL_TRACE(1); - /* stupid ISO C forbids conversion of object pointer to function + /* ISO C forbids conversion of object pointer to function pointer. So we do this, which is the same thing, but without the warning from GCC */ cbfunc_union.ptr = cbdata; @@ -444,10 +232,6 @@ static void orte_rmgr_proxy_callback(orte_gpr_notify_data_t *data, void *cbdata) (*cbfunc)(jobid,ORTE_PROC_STATE_TERMINATED); continue; } - if(strcmp(keyval->key, ORTE_PROC_NUM_ABORTED) == 0) { - (*cbfunc)(jobid,ORTE_PROC_STATE_ABORTED); - continue; - } } } } @@ -481,7 +265,7 @@ static void orte_rmgr_proxy_wireup_callback(orte_gpr_notify_data_t *data, void * */ -static int orte_rmgr_proxy_spawn( +static int orte_rmgr_proxy_spawn_job( orte_app_context_t** app_context, orte_std_cntr_t num_context, orte_jobid_t* jobid, @@ -496,24 +280,24 @@ static int orte_rmgr_proxy_spawn( /* * Perform resource discovery. */ - if (ORTE_SUCCESS != (rc = orte_rmgr_proxy_query())) { + if (ORTE_SUCCESS != (rc = orte_rds.query())) { ORTE_ERROR_LOG(rc); return rc; } /* - * Initialize job segment and allocate resources + * Setup job and allocate resources */ if (ORTE_SUCCESS != - (rc = orte_rmgr_proxy_create(app_context,num_context,jobid))) { + (rc = orte_rmgr_proxy_setup_job(app_context,num_context,jobid))) { ORTE_ERROR_LOG(rc); return rc; } - if (ORTE_SUCCESS != (rc = orte_rmgr_proxy_allocate(*jobid))) { + if (ORTE_SUCCESS != (rc = orte_ras.allocate_job(*jobid))) { ORTE_ERROR_LOG(rc); return rc; } - if (ORTE_SUCCESS != (rc = orte_rmgr_proxy_map(*jobid))) { + if (ORTE_SUCCESS != (rc = orte_rmaps.map_job(*jobid, NULL))) { ORTE_ERROR_LOG(rc); return rc; } @@ -536,7 +320,7 @@ static int orte_rmgr_proxy_spawn( } /** setup the subscription so we can complete the wireup when all processes reach LAUNCHED */ - rc = orte_rmgr_base_proc_stage_gate_subscribe(*jobid, orte_rmgr_proxy_wireup_callback, NULL, ORTE_PROC_STATE_LAUNCHED); + rc = orte_smr.job_stage_gate_subscribe(*jobid, orte_rmgr_proxy_wireup_callback, NULL, ORTE_PROC_STATE_LAUNCHED); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -553,13 +337,13 @@ static int orte_rmgr_proxy_spawn( } cbfunc_union; void *cbdata; - /* stupid ISO C forbids conversion of object pointer to function + /* ISO C forbids conversion of object pointer to function pointer. So we do this, which is the same thing, but without the warning from GCC */ cbfunc_union.func = cbfunc; cbdata = cbfunc_union.ptr; - rc = orte_rmgr_base_proc_stage_gate_subscribe(*jobid, orte_rmgr_proxy_callback, cbdata, cb_conditions); + rc = orte_smr.job_stage_gate_subscribe(*jobid, orte_rmgr_proxy_callback, cbdata, cb_conditions); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -569,12 +353,12 @@ static int orte_rmgr_proxy_spawn( /* * launch the job */ - if (ORTE_SUCCESS != (rc = orte_rmgr_proxy_launch(*jobid))) { + if (ORTE_SUCCESS != (rc = orte_pls.launch_job(*jobid))) { ORTE_ERROR_LOG(rc); return rc; } - orte_ns.free_name(&name); + free(&name); return ORTE_SUCCESS; } diff --git a/orte/mca/rmgr/proxy/rmgr_proxy_component.c b/orte/mca/rmgr/proxy/rmgr_proxy_component.c index 6f44d63265..23717e384f 100644 --- a/orte/mca/rmgr/proxy/rmgr_proxy_component.c +++ b/orte/mca/rmgr/proxy/rmgr_proxy_component.c @@ -44,7 +44,7 @@ orte_rmgr_proxy_component_t mca_rmgr_proxy_component = { /* Indicate that we are a iof v1.0.0 component (which also implies a specific MCA version) */ - ORTE_RMGR_BASE_VERSION_1_0_0, + ORTE_RMGR_BASE_VERSION_1_3_0, "proxy", /* MCA component name */ ORTE_MAJOR_VERSION, /* MCA component major version */ @@ -76,6 +76,14 @@ static int orte_rmgr_proxy_open(void) static orte_rmgr_base_module_t *orte_rmgr_proxy_init(int* priority) { + /* if we are an HNP, then do NOT select us */ + if (orte_process_info.seed) { + return NULL; + } + + /* set us as lowest priority so we can be overridden + * by OS-specific components + */ *priority = 1; return &orte_rmgr_proxy_module; } diff --git a/orte/mca/rmgr/rmgr.h b/orte/mca/rmgr/rmgr.h index 9c34464e13..4b944a7284 100644 --- a/orte/mca/rmgr/rmgr.h +++ b/orte/mca/rmgr/rmgr.h @@ -50,16 +50,7 @@ extern "C" { */ /** - * Query/update a resource - * - * @code - * return_value = orte_rmgr.query(); - * @endcode - */ -typedef int (*orte_rmgr_base_module_query_fn_t)(void); - -/** - * Create a job. Allocated a jobid and initializes the job segment. + * Setup a job. Allocated a jobid and initializes the job segment. * * @param app_context Array of application context values. * @param num_context Number of entries in the app_context array. @@ -68,80 +59,14 @@ typedef int (*orte_rmgr_base_module_query_fn_t)(void); * @code * orte_jobid_t jobid; * - * return_value = orte_rmgr.create(app_context,num_context,&jobid); + * return_value = orte_rmgr.setup_job(app_context,num_context,&jobid); * @endcode */ -typedef int (*orte_rmgr_base_module_create_fn_t)( +typedef int (*orte_rmgr_base_module_setup_job_fn_t)( orte_app_context_t** app_context, orte_std_cntr_t num_context, orte_jobid_t *jobid); -/** - * Allocate resources to a job. - * - * @code - * return_value = orte_rmgr.allocate(orte_jobid_t jobid) - * @endcode - */ -typedef int (*orte_rmgr_base_module_allocate_fn_t)(orte_jobid_t jobid); - -/** - * Deallocate resources from a job - * - * @code - * return_value = orte_rmgr.deallocate(orte_jobid_t jobid); - * @endcode - */ -typedef int (*orte_rmgr_base_module_deallocate_fn_t)(orte_jobid_t jobid); - -/** - * Map processes to resources assigned to a job. - * - * @code - * return_value = orte_mgr.map(orte_jobid_t jobid); - * @endcode - */ -typedef int (*orte_rmgr_base_module_map_fn_t)(orte_jobid_t job); - -/** - * Launch processes that have been mapped. - * - * @code - * return_value = orte_rmgr.launch(orte_jobid_t jobid); - * @endcode - */ -typedef int (*orte_rmgr_base_module_launch_fn_t)(orte_jobid_t job); - -/** - * Terminate an entire job. - * - * @code - * return_value = orte_rmgr.terminate_job(orte_jobid_t jobid); - * @endcode - */ -typedef int (*orte_rmgr_base_module_terminate_job_fn_t)(orte_jobid_t job); - -/** - * Terminate a specific process. - * - * @code - * return_value = orte_rmgr.terminate_proc(const orte_process_name_t* proc_name); - * @endcode - */ -typedef int (*orte_rmgr_base_module_terminate_proc_fn_t)(const orte_process_name_t* proc_name); - - -/** - * Transmit a signal to an entire job - */ -typedef int (*orte_rmgr_base_module_signal_job_fn_t)(orte_jobid_t job, int32_t signal); - -/** - * Transmit a signal to a specific process - */ -typedef int (*orte_rmgr_base_module_signal_proc_fn_t)(const orte_process_name_t* proc_name, int32_t signal); - - /* * Callback function for resource manager */ @@ -164,61 +89,91 @@ typedef void (*orte_rmgr_cb_fn_t)(orte_jobid_t jobid, orte_proc_state_t state); * return_value = orte_rmgr.spawn(app_context, num_context, &jobid, NULL, 0); * @endcode */ -typedef int (*orte_rmgr_base_module_spawn_fn_t)( +typedef int (*orte_rmgr_base_module_spawn_job_fn_t)( orte_app_context_t** app_context, orte_std_cntr_t num_context, orte_jobid_t *jobid, orte_rmgr_cb_fn_t cbfn, orte_proc_state_t cb_conditions); -/* - * Init the proc stage gate process - * A process goes through several stages during its life, each stage being marked by - * a barrier function that prevents the process from going any further until all - * processes reach that point. This function initializes the callbacks required - * to manage that process. - */ -typedef int (*orte_rmgr_base_module_proc_stage_gate_init_fn_t)(orte_jobid_t job); - -/* - * Call the proc stage gate manager - * As each process achieves a defined barrier (or "stage gate"), it sets its process - * status (via the SOH) to indicate "at stage gate x". When all process have reached - * that point, this function is called with a message indicating this has happened. - * The stage gate manager then takes the appropriate action for that stage gate - - * usually, broadcasting a message to all processes in the job that allows them - * to proceed. - */ -typedef int (*orte_rmgr_base_module_proc_stage_gate_mgr_fn_t)(orte_gpr_notify_message_t *msg); /** - * Cleanup resources held by rmgr. + * Allow module-specific init. + */ + +typedef int (*orte_rmgr_base_module_init_fn_t)(void); + +/** + * Cleanup resources held by rmgr. */ typedef int (*orte_rmgr_base_module_finalize_fn_t)(void); +/*** APP_CONTEXT FUNCTIONS ***/ /* - * Ver 1.0.0 + * Store an array of app_context objects for a given job/pset */ -struct orte_rmgr_base_module_1_0_0_t { - orte_rmgr_base_module_query_fn_t query; - orte_rmgr_base_module_create_fn_t create; - orte_rmgr_base_module_allocate_fn_t allocate; - orte_rmgr_base_module_deallocate_fn_t deallocate; - orte_rmgr_base_module_map_fn_t map; - orte_rmgr_base_module_launch_fn_t launch; - orte_rmgr_base_module_terminate_job_fn_t terminate_job; - orte_rmgr_base_module_terminate_proc_fn_t terminate_proc; - orte_rmgr_base_module_signal_job_fn_t signal_job; - orte_rmgr_base_module_signal_proc_fn_t signal_proc; - orte_rmgr_base_module_spawn_fn_t spawn; - orte_rmgr_base_module_proc_stage_gate_init_fn_t stage_gate_init; - orte_rmgr_base_module_proc_stage_gate_mgr_fn_t stage_gate_mgr; - orte_rmgr_base_module_finalize_fn_t finalize; +typedef int (*orte_rmgr_base_module_store_app_context_fn_t)(orte_jobid_t jobid, + orte_app_context_t** app_context, + orte_std_cntr_t num_context); + +/* + * Get an array of app_context objects for a given job/pset + */ +typedef int (*orte_rmgr_base_module_get_app_context_fn_t)(orte_jobid_t jobid, + orte_app_context_t ***app_context, + orte_std_cntr_t *num_context); + +/* + * Check the app_context for changing to a working dir or the HOME dir + */ +typedef int (*orte_rmgr_base_module_check_context_cwd_fn_t)(orte_app_context_t *context, + bool want_chdir); + +/* + * Check app_context application for existence + */ +typedef int (*orte_rmgr_base_module_check_context_app_fn_t)(orte_app_context_t *context); + +/** + * VPID FUNCTIONS + */ + +/** + * Store the vpid range of a job + */ +typedef int (*orte_rmgr_base_module_set_vpid_range_fn_t)(orte_jobid_t jobid, + orte_vpid_t start, + orte_vpid_t range); + + +/** + * Retrieve the vpid range of a job + */ +typedef int (*orte_rmgr_base_module_get_vpid_range_fn_t)(orte_jobid_t jobid, + orte_vpid_t *start, + orte_vpid_t *range); + + +/* + * Ver 1.3.0 + */ +struct orte_rmgr_base_module_1_3_0_t { + orte_rmgr_base_module_init_fn_t module_init; + orte_rmgr_base_module_setup_job_fn_t setup_job; + orte_rmgr_base_module_spawn_job_fn_t spawn_job; + orte_rmgr_base_module_finalize_fn_t finalize; + /** SUPPORT FUNCTIONS ***/ + orte_rmgr_base_module_get_app_context_fn_t get_app_context; + orte_rmgr_base_module_store_app_context_fn_t store_app_context; + orte_rmgr_base_module_check_context_cwd_fn_t check_context_cwd; + orte_rmgr_base_module_check_context_app_fn_t check_context_app; + orte_rmgr_base_module_set_vpid_range_fn_t set_vpid_range; + orte_rmgr_base_module_get_vpid_range_fn_t get_vpid_range; }; -typedef struct orte_rmgr_base_module_1_0_0_t orte_rmgr_base_module_1_0_0_t; -typedef orte_rmgr_base_module_1_0_0_t orte_rmgr_base_module_t; +typedef struct orte_rmgr_base_module_1_3_0_t orte_rmgr_base_module_1_3_0_t; +typedef orte_rmgr_base_module_1_3_0_t orte_rmgr_base_module_t; /* * RMGR Component @@ -232,24 +187,24 @@ typedef orte_rmgr_base_module_t* (*orte_rmgr_base_component_init_fn_t)( * the standard component data structure */ -struct orte_rmgr_base_component_1_0_0_t { +struct orte_rmgr_base_component_1_3_0_t { mca_base_component_t rmgr_version; mca_base_component_data_1_0_0_t rmgr_data; orte_rmgr_base_component_init_fn_t rmgr_init; }; -typedef struct orte_rmgr_base_component_1_0_0_t orte_rmgr_base_component_1_0_0_t; -typedef orte_rmgr_base_component_1_0_0_t orte_rmgr_base_component_t; +typedef struct orte_rmgr_base_component_1_3_0_t orte_rmgr_base_component_1_3_0_t; +typedef orte_rmgr_base_component_1_3_0_t orte_rmgr_base_component_t; /** * Macro for use in components that are of type rmgr v1.0.0 */ -#define ORTE_RMGR_BASE_VERSION_1_0_0 \ +#define ORTE_RMGR_BASE_VERSION_1_3_0 \ /* rmgr v1.0 is chained to MCA v1.0 */ \ MCA_BASE_VERSION_1_0_0, \ - /* rmgr v1.0 */ \ - "rmgr", 1, 0, 0 + /* rmgr v1.3 */ \ + "rmgr", 1, 3, 0 /** * Global structure for accessing RAS functions diff --git a/orte/mca/rmgr/rmgr_types.h b/orte/mca/rmgr/rmgr_types.h index aca8326819..c66eef789a 100644 --- a/orte/mca/rmgr/rmgr_types.h +++ b/orte/mca/rmgr/rmgr_types.h @@ -19,6 +19,10 @@ #ifndef ORTE_RMGR_TYPES_H #define ORTE_RMGR_TYPES_H +#include "orte_config.h" + +#include "opal/class/opal_object.h" + #if defined(c_plusplus) || defined(__cplusplus) extern "C" { #endif @@ -31,20 +35,11 @@ extern "C" { /* * Constants for command values */ -#define ORTE_RMGR_CMD_QUERY 1 -#define ORTE_RMGR_CMD_CREATE 2 -#define ORTE_RMGR_CMD_ALLOCATE 3 -#define ORTE_RMGR_CMD_DEALLOCATE 4 -#define ORTE_RMGR_CMD_MAP 5 -#define ORTE_RMGR_CMD_LAUNCH 6 -#define ORTE_RMGR_CMD_TERM_JOB 7 -#define ORTE_RMGR_CMD_TERM_PROC 8 -#define ORTE_RMGR_CMD_SPAWN 9 -#define ORTE_RMGR_CMD_SIGNAL_JOB 10 -#define ORTE_RMGR_CMD_SIGNAL_PROC 11 +#define ORTE_RMGR_SETUP_JOB_CMD 1 +#define ORTE_RMGR_SPAWN_JOB_CMD 2 -#define ORTE_RMGR_CMD ORTE_UINT32 -typedef uint32_t orte_rmgr_cmd_t; +#define ORTE_RMGR_CMD ORTE_UINT8 +typedef uint8_t orte_rmgr_cmd_t; /* RESOURCE MANAGER DATA TYPES */ diff --git a/orte/mca/rmgr/urm/rmgr_urm.c b/orte/mca/rmgr/urm/rmgr_urm.c index 9b0c5df6cc..76537ac472 100644 --- a/orte/mca/rmgr/urm/rmgr_urm.c +++ b/orte/mca/rmgr/urm/rmgr_urm.c @@ -16,6 +16,8 @@ * $HEADER$ */ #include "orte_config.h" +#include "orte/orte_constants.h" + #ifdef HAVE_SYS_TIME_H #include #endif /* HAVE_SYS_TIME_H */ @@ -29,105 +31,73 @@ #include "opal/util/trace.h" -#include "orte/orte_constants.h" #include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rds/base/base.h" -#include "orte/mca/ras/base/base.h" -#include "orte/mca/rmaps/base/base.h" -#include "orte/mca/rmgr/base/base.h" -#include "orte/mca/pls/base/base.h" +#include "orte/mca/rds/rds.h" +#include "orte/mca/ras/ras.h" +#include "orte/mca/rmaps/rmaps.h" +#include "orte/mca/pls/pls.h" #include "orte/mca/gpr/gpr.h" #include "orte/mca/iof/iof.h" #include "orte/mca/ns/ns.h" -#include "orte/mca/rml/rml.h" #include "orte/mca/smr/smr.h" +#include "orte/mca/rmgr/base/rmgr_private.h" #include "orte/mca/rmgr/urm/rmgr_urm.h" -static int orte_rmgr_urm_query(void); - -static int orte_rmgr_urm_create( +static int orte_rmgr_urm_setup_job( orte_app_context_t** app_context, orte_std_cntr_t num_context, orte_jobid_t* jobid); -static int orte_rmgr_urm_allocate( - orte_jobid_t jobid); - -static int orte_rmgr_urm_deallocate( - orte_jobid_t jobid); - -static int orte_rmgr_urm_map( - orte_jobid_t jobid); - -static int orte_rmgr_urm_launch( - orte_jobid_t jobid); - -static int orte_rmgr_urm_terminate_job( - orte_jobid_t jobid); - -static int orte_rmgr_urm_terminate_proc( - const orte_process_name_t* proc_name); - -static int orte_rmgr_urm_signal_job( - orte_jobid_t jobid, int32_t signal); - -static int orte_rmgr_urm_signal_proc( - const orte_process_name_t* proc_name, - int32_t signal); - -static int orte_rmgr_urm_spawn( +static int orte_rmgr_urm_spawn_job( orte_app_context_t** app_context, orte_std_cntr_t num_context, orte_jobid_t* jobid, orte_rmgr_cb_fn_t cbfn, orte_proc_state_t cb_conditions); -static int orte_rmgr_urm_finalize(void); +static int orte_rmgr_urm_module_init(void); + +static int orte_rmgr_urm_module_finalize(void); orte_rmgr_base_module_t orte_rmgr_urm_module = { - orte_rmgr_urm_query, - orte_rmgr_urm_create, - orte_rmgr_urm_allocate, - orte_rmgr_urm_deallocate, - orte_rmgr_urm_map, - orte_rmgr_urm_launch, - orte_rmgr_urm_terminate_job, - orte_rmgr_urm_terminate_proc, - orte_rmgr_urm_signal_job, - orte_rmgr_urm_signal_proc, - orte_rmgr_urm_spawn, - orte_rmgr_base_proc_stage_gate_init, - orte_rmgr_base_proc_stage_gate_mgr, - orte_rmgr_urm_finalize + orte_rmgr_urm_module_init, + orte_rmgr_urm_setup_job, + orte_rmgr_urm_spawn_job, + orte_rmgr_urm_module_finalize, + /** SUPPORT FUNCTIONS ***/ + orte_rmgr_base_get_app_context, + orte_rmgr_base_put_app_context, + orte_rmgr_base_check_context_cwd, + orte_rmgr_base_check_context_app, + orte_rmgr_base_set_vpid_range, + orte_rmgr_base_get_vpid_range }; -/* - * Resource discovery - */ -static int orte_rmgr_urm_query(void) +/* + * Since we were selected, complete the init + * by starting the comm system + */ +static int orte_rmgr_urm_module_init(void) { int rc; - - OPAL_TRACE(1); - - if(ORTE_SUCCESS != (rc = orte_rds_base_query())) { + + if (ORTE_SUCCESS != (rc = orte_rmgr_base_comm_start())) { ORTE_ERROR_LOG(rc); - return rc; } - return ORTE_SUCCESS; + return rc; } /* - * Create the job segment and initialize the application context. + * Setup the job */ -static int orte_rmgr_urm_create( +static int orte_rmgr_urm_setup_job( orte_app_context_t** app_context, orte_std_cntr_t num_context, orte_jobid_t* jobid) @@ -150,127 +120,21 @@ static int orte_rmgr_urm_create( return rc; } + /* set a default job state of INIT. We need this so that + * orterun doesn't report an error from the registry if + * the spawn fails for some reason. Otherwise, orterun + * will try to get the job state (to see why we exited) + * and will find nothing + */ + if (ORTE_SUCCESS != (rc = orte_smr.set_job_state(*jobid, ORTE_JOB_STATE_INIT))) { + ORTE_ERROR_LOG(rc); + return rc; + } + return ORTE_SUCCESS; } -static int orte_rmgr_urm_allocate(orte_jobid_t jobid) -{ - OPAL_TRACE(1); - - return orte_ras_base_allocate(jobid, &mca_rmgr_urm_component.urm_ras); -} - -static int orte_rmgr_urm_deallocate(orte_jobid_t jobid) -{ - OPAL_TRACE(1); - - return mca_rmgr_urm_component.urm_ras->deallocate(jobid); -} - -static int orte_rmgr_urm_map(orte_jobid_t jobid) -{ - OPAL_TRACE(1); - - return mca_rmgr_urm_component.urm_rmaps->map(jobid); -} - -static int orte_rmgr_urm_launch(orte_jobid_t jobid) -{ - int ret, ret2; - - OPAL_TRACE(1); - - if (ORTE_SUCCESS != - (ret = mca_rmgr_urm_component.urm_pls->launch(jobid))) { - ORTE_ERROR_LOG(ret); - ret2 = orte_smr.set_job_state(jobid, ORTE_JOB_STATE_ABORTED); - if (ORTE_SUCCESS != ret2) { - ORTE_ERROR_LOG(ret2); - return ret2; - } - } - - return ret; -} - -static int orte_rmgr_urm_terminate_job(orte_jobid_t jobid) -{ - int ret; - orte_jobid_t my_jobid; - - OPAL_TRACE(1); - - ret = orte_ns.get_jobid(&my_jobid, orte_process_info.my_name); - if (ORTE_SUCCESS == ret) { - /* if our jobid is the one we're trying to kill AND we're a - singleton, then calling the urm_pls isn't going to be able - to do anything. Just call exit. */ - if (orte_process_info.singleton && jobid == my_jobid) { - exit(1); - } - } - - return mca_rmgr_urm_component.urm_pls->terminate_job(jobid); -} - -static int orte_rmgr_urm_terminate_proc(const orte_process_name_t* proc_name) -{ - OPAL_TRACE(1); - - if ((0 == orte_ns.compare(ORTE_NS_CMP_ALL, proc_name, - orte_process_info.my_name)) && - (orte_process_info.singleton)) { - /* if we're trying to get ourselves killed and we're a - singleton, calling terminate_proc isn't going to work - properly -- there's no pls setup properly for us. Just - call exit and be done. */ - exit(1); - } - - return mca_rmgr_urm_component.urm_pls->terminate_proc(proc_name); -} - - -static int orte_rmgr_urm_signal_job(orte_jobid_t jobid, int32_t signal) -{ - int ret; - orte_jobid_t my_jobid; - - OPAL_TRACE(1); - - ret = orte_ns.get_jobid(&my_jobid, orte_process_info.my_name); - if (ORTE_SUCCESS == ret) { - /** if our jobid is the one we're trying to signal AND we're a - * singleton, then calling the urm_pls isn't going to be able - * to do anything - we already have the signal! */ - if (orte_process_info.singleton && jobid == my_jobid) { - return ORTE_SUCCESS; - } - } - - return mca_rmgr_urm_component.urm_pls->signal_job(jobid, signal); -} - -static int orte_rmgr_urm_signal_proc(const orte_process_name_t* proc_name, int32_t signal) -{ - OPAL_TRACE(1); - - if ((0 == orte_ns.compare(ORTE_NS_CMP_ALL, proc_name, - orte_process_info.my_name)) && - (orte_process_info.singleton)) { - /** if we're trying to signal ourselves and we're a - * singleton, calling signal_proc isn't going to work - * properly -- there's no pls setup properly for us. Besides, we - * already have the signal! - */ - return ORTE_SUCCESS; - } - - return mca_rmgr_urm_component.urm_pls->signal_proc(proc_name, signal); -} - - static void orte_rmgr_urm_wireup_stdin(orte_jobid_t jobid) { int rc; @@ -303,7 +167,7 @@ static void orte_rmgr_urm_callback(orte_gpr_notify_data_t *data, void *cbdata) OPAL_TRACE(1); - /* stupid ISO C forbids conversion of object pointer to function + /* ISO C forbids conversion of object pointer to function pointer. So we do this, which is the same thing, but without the warning from GCC */ cbfunc_union.ptr = cbdata; @@ -362,10 +226,6 @@ static void orte_rmgr_urm_callback(orte_gpr_notify_data_t *data, void *cbdata) (*cbfunc)(jobid,ORTE_PROC_STATE_TERMINATED); continue; } - if(strcmp(keyval->key, ORTE_PROC_NUM_ABORTED) == 0) { - (*cbfunc)(jobid,ORTE_PROC_STATE_ABORTED); - continue; - } } } } @@ -400,7 +260,7 @@ static void orte_rmgr_urm_wireup_callback(orte_gpr_notify_data_t *data, void *cb */ -static int orte_rmgr_urm_spawn( +static int orte_rmgr_urm_spawn_job( orte_app_context_t** app_context, orte_std_cntr_t num_context, orte_jobid_t* jobid, @@ -415,29 +275,26 @@ static int orte_rmgr_urm_spawn( /* * Perform resource discovery. */ - if (mca_rmgr_urm_component.urm_rds == false && - ORTE_SUCCESS != (rc = orte_rds_base_query())) { + if (ORTE_SUCCESS != (rc = orte_rds.query())) { ORTE_ERROR_LOG(rc); return rc; - } else { - mca_rmgr_urm_component.urm_rds = true; } /* * Initialize job segment and allocate resources */ /* JJH Insert C/N mapping stuff here */ if (ORTE_SUCCESS != - (rc = orte_rmgr_urm_create(app_context,num_context,jobid))) { + (rc = orte_rmgr_urm_setup_job(app_context,num_context,jobid))) { ORTE_ERROR_LOG(rc); return rc; } - if (ORTE_SUCCESS != (rc = orte_rmgr_urm_allocate(*jobid))) { + if (ORTE_SUCCESS != (rc = orte_ras.allocate_job(*jobid))) { ORTE_ERROR_LOG(rc); return rc; } - if (ORTE_SUCCESS != (rc = orte_rmgr_urm_map(*jobid))) { + if (ORTE_SUCCESS != (rc = orte_rmaps.map_job(*jobid, NULL))) { ORTE_ERROR_LOG(rc); return rc; } @@ -458,7 +315,8 @@ static int orte_rmgr_urm_spawn( ORTE_ERROR_LOG(rc); return rc; } - + free(name); /* done with this */ + /* setup the launch system's stage gate counters and subscriptions */ if (ORTE_SUCCESS != (rc = orte_rmgr_base_proc_stage_gate_init(*jobid))) { ORTE_ERROR_LOG(rc); @@ -466,14 +324,22 @@ static int orte_rmgr_urm_spawn( } /** setup the subscription so we can complete the wireup when all processes reach LAUNCHED */ - rc = orte_rmgr_base_proc_stage_gate_subscribe(*jobid, orte_rmgr_urm_wireup_callback, NULL, ORTE_PROC_STATE_LAUNCHED); + rc = orte_smr.job_stage_gate_subscribe(*jobid, orte_rmgr_urm_wireup_callback, NULL, ORTE_PROC_STATE_LAUNCHED); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; } + /* + * Define the ERRMGR's callbacks as required + */ + if (ORTE_SUCCESS != (rc = orte_errmgr.register_job(*jobid))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* - * setup callback + * setup caller's callback */ if(NULL != cbfunc) { @@ -483,13 +349,13 @@ static int orte_rmgr_urm_spawn( } cbfunc_union; void *cbdata; - /* stupid ISO C forbids conversion of object pointer to function + /* ISO C forbids conversion of object pointer to function pointer. So we do this, which is the same thing, but without the warning from GCC */ cbfunc_union.func = cbfunc; cbdata = cbfunc_union.ptr; - rc = orte_rmgr_base_proc_stage_gate_subscribe(*jobid, orte_rmgr_urm_callback, cbdata, cb_conditions); + rc = orte_smr.job_stage_gate_subscribe(*jobid, orte_rmgr_urm_callback, cbdata, cb_conditions); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -499,58 +365,24 @@ static int orte_rmgr_urm_spawn( /* * launch the job */ - if (ORTE_SUCCESS != (rc = orte_rmgr_urm_launch(*jobid))) { + if (ORTE_SUCCESS != (rc = orte_pls.launch_job(*jobid))) { ORTE_ERROR_LOG(rc); return rc; } - orte_ns.free_name(&name); return ORTE_SUCCESS; } -static int orte_rmgr_urm_finalize(void) +static int orte_rmgr_urm_module_finalize(void) { int rc; - OPAL_TRACE(1); - - /** - * Finalize Process Launch Subsystem (PLS) - */ - if (ORTE_SUCCESS != (rc = orte_pls_base_finalize())) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /** - * Finalize Resource Mapping Subsystem (RMAPS) - */ - if (ORTE_SUCCESS != (rc = orte_rmaps_base_finalize())) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /** - * Finalize Resource Allocation Subsystem (RAS) - */ - if (ORTE_SUCCESS != (rc = orte_ras_base_finalize())) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /** - * Finalize Resource Discovery Subsystem (RDS) - */ - if (ORTE_SUCCESS != (rc = orte_rds_base_finalize())) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* Cancel pending receive. */ + if (ORTE_SUCCESS != (rc = orte_rmgr_base_comm_stop())) { + ORTE_ERROR_LOG(rc); + } - orte_rml.recv_cancel(ORTE_RML_NAME_ANY, ORTE_RML_TAG_RMGR_SVC); - - return ORTE_SUCCESS; + return rc; } diff --git a/orte/mca/rmgr/urm/rmgr_urm.h b/orte/mca/rmgr/urm/rmgr_urm.h index d97ac2c3e0..cd966e08f4 100644 --- a/orte/mca/rmgr/urm/rmgr_urm.h +++ b/orte/mca/rmgr/urm/rmgr_urm.h @@ -38,12 +38,6 @@ struct orte_rmgr_urm_component_t { orte_rmgr_base_component_t super; /** Has RDS query been called */ bool urm_rds; - /** Selected ras module */ - orte_ras_base_module_t *urm_ras; - /** Selected rmaps module */ - orte_rmaps_base_module_t *urm_rmaps; - /** Selected pls module */ - orte_pls_base_module_t *urm_pls; }; /** Convenience typedef */ typedef struct orte_rmgr_urm_component_t orte_rmgr_urm_component_t; diff --git a/orte/mca/rmgr/urm/rmgr_urm_component.c b/orte/mca/rmgr/urm/rmgr_urm_component.c index 532d8cd6b0..adbb83e8ec 100644 --- a/orte/mca/rmgr/urm/rmgr_urm_component.c +++ b/orte/mca/rmgr/urm/rmgr_urm_component.c @@ -18,18 +18,20 @@ #include "orte_config.h" #include "orte/orte_constants.h" -#include "orte/util/proc_info.h" + #include "opal/util/output.h" +#include "opal/mca/base/mca_base_param.h" + #include "orte/dss/dss_types.h" #include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/rds/base/base.h" -#include "opal/mca/base/mca_base_param.h" -#include "orte/mca/ras/base/base.h" -#include "orte/mca/rmaps/base/base.h" -#include "orte/mca/pls/base/base.h" -#include "orte/mca/rmgr/base/base.h" +#include "orte/util/proc_info.h" +#include "orte/mca/rds/rds.h" +#include "orte/mca/ras/ras.h" +#include "orte/mca/rmaps/rmaps.h" +#include "orte/mca/pls/pls.h" #include "orte/mca/rml/rml.h" + +#include "orte/mca/rmgr/rmgr.h" #include "rmgr_urm.h" /* @@ -47,10 +49,10 @@ orte_rmgr_urm_component_t mca_rmgr_urm_component = { information about the component itself */ { - /* Indicate that we are a iof v1.0.0 component (which also + /* Indicate that we are a rmgr v1.3.0 component (which also implies a specific MCA version) */ - ORTE_RMGR_BASE_VERSION_1_0_0, + ORTE_RMGR_BASE_VERSION_1_3_0, "urm", /* MCA component name */ ORTE_MAJOR_VERSION, /* MCA component major version */ @@ -76,133 +78,17 @@ orte_rmgr_urm_component_t mca_rmgr_urm_component = { */ static int orte_rmgr_urm_open(void) { - int rc; - - /** - * Open Resource Discovery Subsystem (RDS) - */ - if (ORTE_SUCCESS != (rc = orte_rds_base_open())) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /** - * Open Resource Allocation Subsystem (RAS) - */ - if (ORTE_SUCCESS != (rc = orte_ras_base_open())) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /** - * Open Resource Mapping Subsystem (RMAPS) - */ - if (ORTE_SUCCESS != (rc = orte_rmaps_base_open())) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /** - * Open Process Launch Subsystem (PLS) - */ - if (ORTE_SUCCESS != (rc = orte_pls_base_open())) { - ORTE_ERROR_LOG(rc); - return rc; - } - return ORTE_SUCCESS; } - -static void orte_rmgr_urm_recv( - int status, - orte_process_name_t* peer, - orte_buffer_t* req, - orte_rml_tag_t tag, - void* cbdata) -{ - int rc; - orte_buffer_t rsp; - OBJ_CONSTRUCT(&rsp, orte_buffer_t); - - if (ORTE_SUCCESS != (rc = orte_rmgr_base_cmd_dispatch(req,&rsp))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - rc = orte_rml.send_buffer(peer, &rsp, ORTE_RML_TAG_RMGR_CLNT, 0); - if (rc < 0) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - -cleanup: - OBJ_DESTRUCT(&rsp); -} - - static orte_rmgr_base_module_t *orte_rmgr_urm_init(int* priority) { - int rc; - char* pls = NULL; - if(orte_process_info.seed == false) { - /* if we are bootproxy - need to be selected */ - int id = mca_base_param_register_int("rmgr","bootproxy","jobid",NULL,0); - int jobid = 0; - mca_base_param_lookup_int(id,&jobid); - if(jobid == 0) { - return NULL; - } - /* use fork pls for bootproxy */ - id = mca_base_param_register_string("rmgr","bootproxy","pls",NULL,"fork"); - mca_base_param_lookup_string(id,&pls); - } - - /** - * Select RDS components. - */ - if (ORTE_SUCCESS != (rc = orte_rds_base_select())) { - ORTE_ERROR_LOG(rc); - return NULL; - } - mca_rmgr_urm_component.urm_rds = false; - - /** - * Find available RAS components - */ - if (ORTE_SUCCESS != (rc = orte_ras_base_find_available())) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return NULL; - } - - /** - * Select RMAPS component - */ - if (NULL == (mca_rmgr_urm_component.urm_rmaps = orte_rmaps_base_select(NULL))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return NULL; - } - - /** - * Select PLS component - */ - if (NULL == (mca_rmgr_urm_component.urm_pls = orte_pls_base_select(pls))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return NULL; - } - - /* Post non-blocking receive */ - - if (0 > (rc = orte_rml.recv_buffer_nb( - ORTE_RML_NAME_ANY, - ORTE_RML_TAG_RMGR_SVC, - ORTE_RML_PERSISTENT, - orte_rmgr_urm_recv, - NULL))) { - ORTE_ERROR_LOG(rc); + /* if we are NOT an HNP, then we do NOT want to be selected */ + if(!orte_process_info.seed) { return NULL; } + /* volunteer to be selected */ *priority = 100; return &orte_rmgr_urm_module; } @@ -213,39 +99,5 @@ static orte_rmgr_base_module_t *orte_rmgr_urm_init(int* priority) */ static int orte_rmgr_urm_close(void) { - int rc; - - /** - * Close Process Launch Subsystem (PLS) - */ - if (ORTE_SUCCESS != (rc = orte_pls_base_close())) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /** - * Close Resource Mapping Subsystem (RMAPS) - */ - if (ORTE_SUCCESS != (rc = orte_rmaps_base_close())) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /** - * Close Resource Allocation Subsystem (RAS) - */ - if (ORTE_SUCCESS != (rc = orte_ras_base_close())) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /** - * Close Resource Discovery Subsystem (RDS) - */ - if (ORTE_SUCCESS != (rc = orte_rds_base_close())) { - ORTE_ERROR_LOG(rc); - return rc; - } - return ORTE_SUCCESS; } diff --git a/orte/mca/rml/rml_types.h b/orte/mca/rml/rml_types.h index c926cad7bb..d1a5ff1d3e 100644 --- a/orte/mca/rml/rml_types.h +++ b/orte/mca/rml/rml_types.h @@ -35,16 +35,23 @@ typedef uint32_t orte_rml_tag_t; #define ORTE_RML_TAG ORTE_UINT32 -#define ORTE_RML_TAG_NS 1 -#define ORTE_RML_TAG_GPR 2 -#define ORTE_RML_TAG_GPR_NOTIFY 3 -#define ORTE_RML_TAG_DAEMON 4 -#define ORTE_RML_TAG_IOF_SVC 5 -#define ORTE_RML_TAG_IOF_CLNT 6 -#define ORTE_RML_TAG_XCAST 7 -#define ORTE_RML_TAG_RMGR_SVC 8 -#define ORTE_RML_TAG_RMGR_CLNT 9 -#define ORTE_RML_TAG_PROBE 10 +#define ORTE_RML_TAG_NS 1 +#define ORTE_RML_TAG_GPR 2 +#define ORTE_RML_TAG_GPR_NOTIFY 3 +#define ORTE_RML_TAG_DAEMON 4 +#define ORTE_RML_TAG_IOF_SVC 5 +#define ORTE_RML_TAG_IOF_CLNT 6 +#define ORTE_RML_TAG_XCAST 7 +#define ORTE_RML_TAG_RMGR 8 +#define ORTE_RML_TAG_PROBE 9 +#define ORTE_RML_TAG_RDS 10 +#define ORTE_RML_TAG_RAS 11 +#define ORTE_RML_TAG_RMAPS 12 +#define ORTE_RML_TAG_PLS 13 +#define ORTE_RML_TAG_PLS_ORTED 14 +#define ORTE_RML_TAG_ERRMGR 15 +#define ORTE_RML_TAG_BPROC 16 +#define ORTE_RML_TAG_BPROC_ABORT 17 #define ORTE_RML_TAG_DYNAMIC 2000 #define ORTE_RML_TAG_MAX UINT32_MAX diff --git a/orte/mca/schema/schema_types.h b/orte/mca/schema/schema_types.h index b47d71277a..f6698983b5 100644 --- a/orte/mca/schema/schema_types.h +++ b/orte/mca/schema/schema_types.h @@ -77,7 +77,8 @@ #define ORTE_PROC_EXIT_CODE_KEY "orte-proc-exit-code" #define ORTE_PROC_NUM_ALIVE "orte-proc-num-alive" #define ORTE_PROC_NUM_ABORTED "orte-proc-num-aborted" -#define ORTE_PROC_NUM_AT_INIT "orte-proc-num-init" +#define ORTE_PROC_NUM_FAILED_START "orte-proc-num-failed-start" +#define ORTE_PROC_NUM_AT_INIT "orte-proc-num-init" #define ORTE_PROC_NUM_LAUNCHED "orte-proc-num-launched" #define ORTE_PROC_NUM_RUNNING "orte-proc-num-running" #define ORTE_PROC_NUM_AT_STG1 "orte-proc-num-stg1" @@ -86,6 +87,7 @@ #define ORTE_PROC_NUM_FINALIZED "orte-proc-num-finalized" #define ORTE_PROC_NUM_TERMINATED "orte-proc-num-terminated" #define ORTE_PROC_RML_IP_ADDRESS_KEY "orte-proc-rml-ip-addr" + /* * ORTE-wide names for specific system triggers and subscriptions */ @@ -98,5 +100,14 @@ #define ORTE_NUM_FINALIZED_TRIGGER "orte-num-finalized" #define ORTE_NUM_ABORTED_TRIGGER "orte-num-aborted" #define ORTE_NUM_TERMINATED_TRIGGER "orte-num-terminated" +#define ORTE_FAILED_TO_START_TRIGGER "orte-failed-start-trig" + +/* + * ORTED (ORTE DAEMON) TRIGGER DEFINITIONS + */ +#define ORTED_LAUNCH_STAGE_GATE_TRIGGER "orted-launch-gate" +#define ORTED_LAUNCH_STG_SUB "orted-launch-sub" +#define ORTED_LAUNCH_STAGE_GATE_CNTR "orted-num-at-launch-gate" +#define ORTED_NUM_TO_BE_LAUNCHED "orted-num-to-be-launched" #endif diff --git a/orte/mca/smr/Makefile.am b/orte/mca/smr/Makefile.am index ab3b5668ea..c547b951fe 100644 --- a/orte/mca/smr/Makefile.am +++ b/orte/mca/smr/Makefile.am @@ -27,6 +27,8 @@ nobase_orte_HEADERS = headers = smr.h smr_types.h libmca_smr_la_SOURCES += $(headers) +include base/Makefile.am + # Conditionally install the header files if WANT_INSTALL_HEADERS nobase_orte_HEADERS += $(headers) @@ -35,7 +37,5 @@ else ortedir = $(includedir) endif -include base/Makefile.am - distclean-local: rm -f base/static-components.h diff --git a/orte/mca/smr/base/Makefile.am b/orte/mca/smr/base/Makefile.am index 5340573980..2044ea63f5 100644 --- a/orte/mca/smr/base/Makefile.am +++ b/orte/mca/smr/base/Makefile.am @@ -28,6 +28,7 @@ libmca_smr_la_SOURCES += \ base/smr_base_set_proc_state.c \ base/smr_base_get_job_state.c \ base/smr_base_set_job_state.c \ + base/smr_base_trig_init_fns.c \ base/smr_base_open.c \ base/data_type_support/smr_data_type_compare_fns.c \ base/data_type_support/smr_data_type_copy_fns.c \ diff --git a/orte/mca/smr/base/smr_base_local_functions.c b/orte/mca/smr/base/smr_base_local_functions.c index af75243241..805a555115 100644 --- a/orte/mca/smr/base/smr_base_local_functions.c +++ b/orte/mca/smr/base/smr_base_local_functions.c @@ -30,26 +30,32 @@ #include "orte/mca/smr/base/smr_private.h" +/* these functions will default to SUCCESS so that environments + * that do not provide any support will not fail. This is + * particularly important for the "begin_monitoring" function + * as some systems can call this without that support + */ + int orte_smr_base_get_node_state_not_available(orte_node_state_t *state, orte_cellid_t cell, char *nodename) { - return ORTE_ERR_NOT_IMPLEMENTED; + return ORTE_SUCCESS; } int orte_smr_base_set_node_state_not_available(orte_cellid_t cell, char *nodename, orte_node_state_t state) { - return ORTE_ERR_NOT_IMPLEMENTED; + return ORTE_SUCCESS; } int orte_smr_base_begin_monitoring_not_available(orte_jobid_t job) { - return ORTE_ERR_NOT_IMPLEMENTED; + return ORTE_SUCCESS; } int orte_smr_base_module_finalize_not_available (void) { - return ORTE_ERR_NOT_IMPLEMENTED; + return ORTE_SUCCESS; } diff --git a/orte/mca/smr/base/smr_base_open.c b/orte/mca/smr/base/smr_base_open.c index 396afff1f2..1f8d2b2982 100644 --- a/orte/mca/smr/base/smr_base_open.c +++ b/orte/mca/smr/base/smr_base_open.c @@ -62,6 +62,10 @@ orte_smr_base_module_t orte_smr = { orte_smr_base_get_job_state, orte_smr_base_set_job_state, orte_smr_base_begin_monitoring_not_available, + orte_smr_base_init_job_stage_gates, + orte_smr_base_init_orted_stage_gates, + orte_smr_base_define_alert_monitor, + orte_smr_base_job_stage_gate_subscribe, orte_smr_base_module_finalize_not_available }; diff --git a/orte/mca/smr/base/smr_base_set_proc_state.c b/orte/mca/smr/base/smr_base_set_proc_state.c index 1c5d34528b..d7b8c98181 100644 --- a/orte/mca/smr/base/smr_base_set_proc_state.c +++ b/orte/mca/smr/base/smr_base_set_proc_state.c @@ -98,7 +98,8 @@ int orte_smr_base_set_proc_state(orte_process_name_t *proc, OBJ_RELEASE(value); /* check to see if we need to increment orte-standard counters */ - if (ORTE_PROC_STATE_AT_STG1 == state || + if (ORTE_PROC_STATE_LAUNCHED == state || + ORTE_PROC_STATE_AT_STG1 == state || ORTE_PROC_STATE_AT_STG2 == state || ORTE_PROC_STATE_AT_STG3 == state || ORTE_PROC_STATE_FINALIZED == state || @@ -128,6 +129,13 @@ int orte_smr_base_set_proc_state(orte_process_name_t *proc, /* see which state we are in - let that determine the counter */ switch (state) { + case ORTE_PROC_STATE_LAUNCHED: + if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]), ORTE_PROC_NUM_LAUNCHED, ORTE_UNDEF, NULL))) { + ORTE_ERROR_LOG(rc); + goto cleanup; + } + break; + case ORTE_PROC_STATE_AT_STG1: if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]), ORTE_PROC_NUM_AT_STG1, ORTE_UNDEF, NULL))) { ORTE_ERROR_LOG(rc); diff --git a/orte/mca/smr/base/smr_base_trig_init_fns.c b/orte/mca/smr/base/smr_base_trig_init_fns.c new file mode 100644 index 0000000000..58c9a15181 --- /dev/null +++ b/orte/mca/smr/base/smr_base_trig_init_fns.c @@ -0,0 +1,448 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2005 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** @file: + */ + +/* + * includes + */ +#include "orte_config.h" + +#include + +#include "orte/orte_constants.h" +#include "orte/orte_types.h" + +#include "opal/util/output.h" + +#include "orte/dss/dss.h" +#include "orte/mca/gpr/gpr.h" +#include "orte/mca/ns/ns.h" +#include "orte/mca/errmgr/errmgr.h" + +#include "orte/mca/smr/base/smr_private.h" + + +int orte_smr_base_init_job_stage_gates(orte_jobid_t job, + orte_gpr_trigger_cb_fn_t cbfunc, + void *user_tag) +{ + orte_std_cntr_t i, num_counters, num_named_trigs; + orte_std_cntr_t zero=0; + int rc, num_start_routing; + orte_gpr_value_t *value; + char* keys[] = { + /* changes to this ordering need to be reflected in code below */ + /* We need to set up counters for all the defined ORTE process states, even though + * the launch system doesn't actually use them all. This must be done so that + * user-defined callbacks can be generated - otherwise, they won't happen! + */ + ORTE_PROC_NUM_AT_INIT, + ORTE_PROC_NUM_LAUNCHED, + ORTE_PROC_NUM_RUNNING, + ORTE_PROC_NUM_TERMINATED, + /* the following stage gates need data routed through them */ + ORTE_PROC_NUM_AT_STG1, + ORTE_PROC_NUM_AT_STG2, + ORTE_PROC_NUM_AT_STG3, + ORTE_PROC_NUM_FINALIZED + }; + char* trig_names[] = { + /* changes to this ordering need to be reflected in code below */ + ORTE_ALL_INIT_TRIGGER, + ORTE_ALL_LAUNCHED_TRIGGER, + ORTE_ALL_RUNNING_TRIGGER, + ORTE_NUM_TERMINATED_TRIGGER, + /* the following triggers need data routed through them */ + ORTE_STG1_TRIGGER, + ORTE_STG2_TRIGGER, + ORTE_STG3_TRIGGER, + ORTE_NUM_FINALIZED_TRIGGER, + }; + char *segment, *trig_name, *tokens[2], *trig_keys[2]; + orte_gpr_trigger_id_t id; + orte_gpr_trigger_action_t trig_mode, trig_mode_routed; + + num_counters = sizeof(keys)/sizeof(keys[0]); + num_named_trigs= sizeof(trig_names)/sizeof(trig_names[0]); + num_start_routing = 4; + + if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, job))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* setup the counters */ + if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&value, + ORTE_GPR_OVERWRITE | ORTE_GPR_TOKENS_XAND | ORTE_GPR_KEYS_OR, + segment, num_counters, 1))) { + + ORTE_ERROR_LOG(rc); + return rc; + } + + value->tokens[0] = strdup(ORTE_JOB_GLOBALS); /* put counters in the job's globals container */ + + for (i=0; i < num_counters; i++) { + if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[i]), keys[i], ORTE_STD_CNTR, &zero))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(value); + return rc; + } + } + + /* put the counters on the registry */ + if (ORTE_SUCCESS != (rc = orte_gpr.put(1, &value))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(value); + return rc; + } + OBJ_RELEASE(value); + + /*** DEFINE STAGE GATE STANDARD TRIGGERS ***/ + /* The standard triggers will return the trigger counters so that we + * can get required information for notifying processes. Other + * subscriptions will then attach to them. + */ + tokens[0] = strdup(ORTE_JOB_GLOBALS); + tokens[1] = NULL; + + trig_keys[0] = strdup(ORTE_JOB_SLOTS_KEY); + trig_mode = ORTE_GPR_TRIG_INCLUDE_TRIG_CNTRS | ORTE_GPR_TRIG_ONE_SHOT | + ORTE_GPR_TRIG_CMP_LEVELS; + trig_mode_routed = trig_mode | ORTE_GPR_TRIG_ROUTE_DATA_THRU_ME; + + for (i=0; i < num_named_trigs; i++) { + trig_keys[1] = strdup(keys[i]); + if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&trig_name, + trig_names[i], job))) { + ORTE_ERROR_LOG(rc); + free(tokens[0]); + free(segment); + free(trig_keys[0]); + free(trig_keys[1]); + return rc; + } + if (i < num_start_routing) { + /* the first set of triggers do NOT have anything routed to them. + * They are setup here strictly for users to attach to them. + * Hence, we do not pass a trigger callback function and + * leave the trig actionso not route data through me + */ + if (ORTE_SUCCESS != (rc = orte_gpr.define_trigger(&id, trig_name, trig_mode, + ORTE_GPR_TOKENS_XAND | ORTE_GPR_KEYS_OR, + segment, tokens, 2, trig_keys, + NULL, NULL))) { + ORTE_ERROR_LOG(rc); + free(tokens[0]); + free(segment); + free(trig_name); + free(trig_keys[0]); + free(trig_keys[1]); + return rc; + } + } else { + if (ORTE_SUCCESS != (rc = orte_gpr.define_trigger(&id, trig_name, trig_mode_routed, + ORTE_GPR_TOKENS_XAND | ORTE_GPR_KEYS_OR, + segment, tokens, 2, trig_keys, + cbfunc, user_tag))) { + ORTE_ERROR_LOG(rc); + free(tokens[0]); + free(segment); + free(trig_name); + free(trig_keys[0]); + free(trig_keys[1]); + return rc; + } + } + free(trig_name); + free(trig_keys[1]); + } + free(trig_keys[0]); + free(tokens[0]); + free(segment); + + return ORTE_SUCCESS; +} + + +/* + * Setup orted-specific stage gates + * setup the orted trigger to fire when the specified number of orteds have been launched + */ +int orte_smr_base_init_orted_stage_gates(orte_jobid_t job, + orte_std_cntr_t num_orteds, + orte_gpr_trigger_cb_fn_t cbfunc, + void *user_tag) +{ + char *segment; + char *trig_name; + orte_gpr_value_t *value; + orte_std_cntr_t zero=0; + char *trig_tokens[] = { + ORTE_JOB_GLOBALS, + NULL + }; + char *trig_keys[] = { + ORTED_NUM_TO_BE_LAUNCHED, + ORTED_LAUNCH_STAGE_GATE_CNTR, + NULL + }; + int rc; + orte_gpr_trigger_id_t id; + + /** get the segment name where all of this is taking place */ + if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, job))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* setup to store two values - the number of orteds and the counter */ + if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&value, + ORTE_GPR_OVERWRITE | ORTE_GPR_TOKENS_XAND | ORTE_GPR_KEYS_OR, + segment, 2, 1))) { + + ORTE_ERROR_LOG(rc); + free(segment); + return rc; + } + value->tokens[0] = strdup(ORTE_JOB_GLOBALS); /* put counters in the job's globals container */ + + /** store the number of orteds */ + if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]), + ORTED_NUM_TO_BE_LAUNCHED, ORTE_STD_CNTR, &num_orteds))) { + ORTE_ERROR_LOG(rc); + free(segment); + OBJ_RELEASE(value); + return rc; + } + /** initialize the counter to zero */ + if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[1]), ORTED_LAUNCH_STAGE_GATE_CNTR, ORTE_STD_CNTR, &zero))) { + ORTE_ERROR_LOG(rc); + free(segment); + OBJ_RELEASE(value); + return rc; + } + + /* put the counters on the registry */ + if (ORTE_SUCCESS != (rc = orte_gpr.put(1, &value))) { + ORTE_ERROR_LOG(rc); + free(segment); + OBJ_RELEASE(value); + return rc; + } + OBJ_RELEASE(value); + + /* now define a trigger based on those counters */ + if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&trig_name, + ORTED_LAUNCH_STAGE_GATE_TRIGGER, job))) { + ORTE_ERROR_LOG(rc); + free(segment); + return rc; + } + if (ORTE_SUCCESS != (rc = orte_gpr.define_trigger(&id, trig_name, + ORTE_GPR_TRIG_INCLUDE_TRIG_CNTRS | ORTE_GPR_TRIG_ONE_SHOT | + ORTE_GPR_TRIG_CMP_LEVELS, + ORTE_GPR_TOKENS_XAND | ORTE_GPR_KEYS_OR, + segment, trig_tokens, 2, trig_keys, + cbfunc, user_tag))) { + ORTE_ERROR_LOG(rc); + free(segment); + free(trig_name); + return rc; + } + + free(segment); + free(trig_name); + return ORTE_SUCCESS; +} + + +/* + * Setup an alert monitor + */ +int orte_smr_base_define_alert_monitor(orte_jobid_t job, + char *trigger_name, + char *counter_key, + orte_std_cntr_t init_value, + orte_std_cntr_t alert_value, + bool one_shot, + orte_gpr_trigger_cb_fn_t cbfunc, + void *user_tag) +{ + int rc; + orte_gpr_value_t *value; + orte_gpr_trigger_action_t trig_mode; + char *segment, *trig_name; + char *tokens[] = { + ORTE_JOB_GLOBALS, + NULL + }; + orte_gpr_trigger_id_t id; + + /* get the job's segment name */ + if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, job))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* setup the counters */ + if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&value, + ORTE_GPR_OVERWRITE | ORTE_GPR_TOKENS_XAND | ORTE_GPR_KEYS_OR, + segment, 1, 1))) { + + ORTE_ERROR_LOG(rc); + free(segment); + return rc; + } + value->tokens[0] = strdup(ORTE_JOB_GLOBALS); /* put counters in the job's globals container */ + + if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]), counter_key, ORTE_STD_CNTR, &init_value))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(value); + return rc; + } + + /* put the counter on the registry */ + if (ORTE_SUCCESS != (rc = orte_gpr.put(1, &value))) { + ORTE_ERROR_LOG(rc); + OBJ_RELEASE(value); + free(segment); + return rc; + } + OBJ_RELEASE(value); + + + /* define the trigger to fire at specified level */ + if (ORTE_SUCCESS != (rc = orte_schema.get_std_trigger_name(&trig_name, + trigger_name, job))) { + ORTE_ERROR_LOG(rc); + free(segment); + return rc; + } + trig_mode = ORTE_GPR_TRIG_INCLUDE_TRIG_CNTRS | ORTE_GPR_TRIG_AT_LEVEL; + if (one_shot) { + trig_mode = trig_mode | ORTE_GPR_TRIG_ONE_SHOT; + } + + if (ORTE_SUCCESS != (rc = orte_gpr.define_trigger_level(&id, trig_name, + trig_mode, + ORTE_GPR_TOKENS_XAND | ORTE_GPR_KEYS_OR, + segment, tokens, 1, &counter_key, &alert_value, + cbfunc, user_tag))) { + ORTE_ERROR_LOG(rc); + free(segment); + free(trig_name); + return rc; + } + free(segment); + free(trig_name); + + return ORTE_SUCCESS; +} + + +/* + * Routine that tools such as orterun can use to subscribe + * to events on all counters. + */ + +int orte_smr_base_job_stage_gate_subscribe(orte_jobid_t job, + orte_gpr_notify_cb_fn_t cbfunc, void* cbdata, + orte_proc_state_t cb_conditions) +{ + orte_std_cntr_t i; + int rc; + char *segment, *trig_name, *tokens[2]; + orte_proc_state_t conditions; + orte_gpr_subscription_id_t id; + /** the order of the next three definitions MUST match */ + orte_proc_state_t state[] = { + ORTE_PROC_STATE_INIT, + ORTE_PROC_STATE_LAUNCHED, + ORTE_PROC_STATE_RUNNING, + ORTE_PROC_STATE_AT_STG1, + ORTE_PROC_STATE_AT_STG2, + ORTE_PROC_STATE_AT_STG3, + ORTE_PROC_STATE_FINALIZED, + ORTE_PROC_STATE_TERMINATED + }; + char* keys[] = { + ORTE_PROC_NUM_AT_INIT, + ORTE_PROC_NUM_LAUNCHED, + ORTE_PROC_NUM_RUNNING, + ORTE_PROC_NUM_AT_STG1, + ORTE_PROC_NUM_AT_STG2, + ORTE_PROC_NUM_AT_STG3, + ORTE_PROC_NUM_FINALIZED, + ORTE_PROC_NUM_TERMINATED + }; + char* trig_names[] = { + ORTE_ALL_INIT_TRIGGER, + ORTE_ALL_LAUNCHED_TRIGGER, + ORTE_ALL_RUNNING_TRIGGER, + ORTE_STG1_TRIGGER, + ORTE_STG2_TRIGGER, + ORTE_STG3_TRIGGER, + ORTE_NUM_FINALIZED_TRIGGER, + ORTE_NUM_TERMINATED_TRIGGER + }; + orte_std_cntr_t num_counters = sizeof(keys)/sizeof(keys[0]); + + /* identify the segment for this job */ + if (ORTE_SUCCESS != (rc = orte_schema.get_job_segment_name(&segment, job))) { + ORTE_ERROR_LOG(rc); + return rc; + } + + /* setup the tokens */ + tokens[0]=ORTE_JOB_GLOBALS; + tokens[1]=NULL; + + conditions = cb_conditions; + for (i=0; i < num_counters; i++) { + if (state[i] & conditions) { + /** want this one - attach ourselves to the appropriate standard trigger */ + if (ORTE_SUCCESS != + (rc = orte_schema.get_std_trigger_name(&trig_name, trig_names[i], job))) { + ORTE_ERROR_LOG(rc); + free(segment); + return rc; + } + + if (ORTE_SUCCESS != (rc = orte_gpr.subscribe_1(&id, trig_name, NULL, + ORTE_GPR_NOTIFY_DELETE_AFTER_TRIG, + ORTE_GPR_TOKENS_OR | ORTE_GPR_KEYS_OR, + segment, tokens, keys[i], + cbfunc, cbdata))) { + ORTE_ERROR_LOG(rc); + free(segment); + free(trig_name); + return rc; + } + free(trig_name); + /* clear the spot in the conditions so we can check that all were found */ + conditions = conditions & ~(state[i]); + } + } + free(segment); + + return ORTE_SUCCESS; +} + + diff --git a/orte/mca/smr/base/smr_private.h b/orte/mca/smr/base/smr_private.h index 5a68e9663b..f0c0f479b2 100644 --- a/orte/mca/smr/base/smr_private.h +++ b/orte/mca/smr/base/smr_private.h @@ -65,6 +65,28 @@ int orte_smr_base_get_job_state(orte_job_state_t *state, int orte_smr_base_set_job_state(orte_jobid_t jobid, orte_job_state_t state); +int orte_smr_base_init_job_stage_gates(orte_jobid_t job, + orte_gpr_trigger_cb_fn_t cbfunc, + void *user_tag); + +int orte_smr_base_init_orted_stage_gates(orte_jobid_t job, + orte_std_cntr_t num_orteds, + orte_gpr_trigger_cb_fn_t cbfunc, + void *user_tag); + +int orte_smr_base_define_alert_monitor(orte_jobid_t job, + char *trigger_name, + char *counter_key, + orte_std_cntr_t init_value, + orte_std_cntr_t alert_value, + bool one_shot, + orte_gpr_trigger_cb_fn_t cbfunc, + void *user_tag); + +int orte_smr_base_job_stage_gate_subscribe(orte_jobid_t job, + orte_gpr_notify_cb_fn_t cbfunc, void* cbdata, + orte_proc_state_t cb_conditions); + int orte_smr_base_begin_monitoring_not_available(orte_jobid_t job); diff --git a/orte/mca/smr/bproc/smr_bproc.c b/orte/mca/smr/bproc/smr_bproc.c index 0939b221df..7133b1da3f 100644 --- a/orte/mca/smr/bproc/smr_bproc.c +++ b/orte/mca/smr/bproc/smr_bproc.c @@ -364,6 +364,10 @@ orte_smr_base_module_t orte_smr_bproc_module = { orte_smr_base_get_job_state, orte_smr_base_set_job_state, orte_smr_base_begin_monitoring_not_available, + orte_smr_base_init_job_stage_gates, + orte_smr_base_init_orted_stage_gates, + orte_smr_base_define_alert_monitor, + orte_smr_base_job_stage_gate_subscribe, orte_smr_bproc_finalize }; diff --git a/orte/mca/smr/smr.h b/orte/mca/smr/smr.h index 905542bd46..0851d29f9f 100644 --- a/orte/mca/smr/smr.h +++ b/orte/mca/smr/smr.h @@ -33,6 +33,8 @@ #include "orte/orte_types.h" #include "opal/mca/mca.h" + +#include "orte/mca/gpr/gpr_types.h" #include "orte/mca/ns/ns_types.h" #include "orte/mca/smr/smr_types.h" @@ -83,6 +85,63 @@ typedef int (*orte_smr_base_module_get_job_state_fn_t)(orte_job_state_t *state, typedef int (*orte_smr_base_module_set_job_state_fn_t)(orte_jobid_t jobid, orte_job_state_t state); +/* + * Define the job-specific standard stage gates + * This function creates all of the ORTE-standard stage gates. + */ +typedef int (*orte_smr_base_module_job_stage_gate_init_fn_t)(orte_jobid_t job, + orte_gpr_trigger_cb_fn_t cbfunc, + void *user_tag); + +/* + * Define the orted standard stage gates + * This function creates all of the orted-standard stage gates. + */ +typedef int (*orte_smr_base_module_orted_stage_gate_init_fn_t)(orte_jobid_t job, + orte_std_cntr_t num_orteds, + orte_gpr_trigger_cb_fn_t cbfunc, + void *user_tag); + +/* + * Define an "alert" monitor + * This function will establish an appropriate trigger to notify the specified + * callback function when an event takes place. In this case, event is defined + * by the specified memory location achieving the specified value - e.g., a + * location could be monitored for a value being set to 1, indicating that a + * process has aborted. + * + * @param job The job that is to be monitored. + * + * @param *trigger_name The name of the trigger to be defined. + * + * @param *counter_key A string defining the key name of the counter on the registry. + * + * @param *counter A pointer to a data_value object that contains the initial + * value to which the counter should be set. + * + * @param *alert_value A pointer to a data_value object that contains the value of + * the counter that should cause the alert to be sent. + * + * @param one_shot Whether or not the trigger should be a one-shot + * + * @param cbfunc A registry callback function to be called when the alert fires. + * + * @param *user_tag Whatever data the user would like to have passed back to them + * when the alert is received + * + * NOTE: alerts are intended solely for purposes of alerting the caller when + * an event happens. Thus, they do not convey any information beyond the fact that + * they fired. + */ +typedef int (*orte_smr_base_module_define_alert_monitor_fn_t)(orte_jobid_t job, + char *trigger_name, + char *counter_key, + orte_std_cntr_t counter, + orte_std_cntr_t alert_value, + bool one_shot, + orte_gpr_trigger_cb_fn_t cbfunc, + void *user_tag); + /* * Initiate monitoring of a job * This function notifies the smr that it should initiate monitoring of the specified @@ -93,6 +152,14 @@ typedef int (*orte_smr_base_module_set_job_state_fn_t)(orte_jobid_t jobid, */ typedef int (*orte_smr_base_module_begin_monitoring_fn_t)(orte_jobid_t job); +/* + * Subscribe to a job stage gate + */ +typedef int (*orte_smr_base_module_job_stage_gate_subscribe_fn_t)(orte_jobid_t job, + orte_gpr_notify_cb_fn_t cbfunc, void* cbdata, + orte_proc_state_t cb_conditions); + + /* Shutdown the module nicely */ @@ -106,14 +173,19 @@ typedef int (*orte_smr_base_module_finalize_fn_t)(void); * Ver 1.3.0 */ struct orte_smr_base_module_1_3_0_t { - orte_smr_base_module_get_proc_state_fn_t get_proc_state; - orte_smr_base_module_set_proc_state_fn_t set_proc_state; - orte_smr_base_module_get_node_state_fn_t get_node_state; - orte_smr_base_module_set_node_state_fn_t set_node_state; - orte_smr_base_module_get_job_state_fn_t get_job_state; - orte_smr_base_module_set_job_state_fn_t set_job_state; - orte_smr_base_module_begin_monitoring_fn_t begin_monitoring_job; - orte_smr_base_module_finalize_fn_t finalize; + orte_smr_base_module_get_proc_state_fn_t get_proc_state; + orte_smr_base_module_set_proc_state_fn_t set_proc_state; + orte_smr_base_module_get_node_state_fn_t get_node_state; + orte_smr_base_module_set_node_state_fn_t set_node_state; + orte_smr_base_module_get_job_state_fn_t get_job_state; + orte_smr_base_module_set_job_state_fn_t set_job_state; + orte_smr_base_module_begin_monitoring_fn_t begin_monitoring_job; + /* TRIGGER INIT FUNCTIONS */ + orte_smr_base_module_job_stage_gate_init_fn_t init_job_stage_gates; + orte_smr_base_module_orted_stage_gate_init_fn_t init_orted_stage_gates; + orte_smr_base_module_define_alert_monitor_fn_t define_alert_monitor; + orte_smr_base_module_job_stage_gate_subscribe_fn_t job_stage_gate_subscribe; + orte_smr_base_module_finalize_fn_t finalize; }; typedef struct orte_smr_base_module_1_3_0_t orte_smr_base_module_1_3_0_t; diff --git a/orte/mca/smr/smr_types.h b/orte/mca/smr/smr_types.h index 360507fbc3..8105d5e496 100644 --- a/orte/mca/smr/smr_types.h +++ b/orte/mca/smr/smr_types.h @@ -34,20 +34,22 @@ typedef int orte_exit_code_t; typedef uint16_t orte_proc_state_t; #define ORTE_PROC_STATE_T ORTE_UINT16 -#define ORTE_PROC_STATE_INIT 0x0001 /* process entry has been created by rmaps */ -#define ORTE_PROC_STATE_LAUNCHED 0x0002 /* process has been launched by pls */ -#define ORTE_PROC_STATE_AT_STG1 0x0004 /* process is at Stage Gate 1 barrier in orte_init */ -#define ORTE_PROC_STATE_AT_STG2 0x0008 /* process is at Stage Gate 2 barrier in orte_init */ -#define ORTE_PROC_STATE_RUNNING 0x0010 /* process has exited orte_init and is running */ -#define ORTE_PROC_STATE_AT_STG3 0x0020 /* process is at Stage Gate 3 barrier in orte_finalize */ -#define ORTE_PROC_STATE_FINALIZED 0x0040 /* process has completed orte_finalize and is running */ -#define ORTE_PROC_STATE_TERMINATED 0x0080 /* process has terminated and is no longer running */ -#define ORTE_PROC_STATE_ABORTED 0x0100 /* process aborted */ +#define ORTE_PROC_STATE_INIT 0x0001 /* process entry has been created by rmaps */ +#define ORTE_PROC_STATE_LAUNCHED 0x0002 /* process has been launched by pls */ +#define ORTE_PROC_STATE_AT_STG1 0x0004 /* process is at Stage Gate 1 barrier in orte_init */ +#define ORTE_PROC_STATE_AT_STG2 0x0008 /* process is at Stage Gate 2 barrier in orte_init */ +#define ORTE_PROC_STATE_RUNNING 0x0010 /* process has exited orte_init and is running */ +#define ORTE_PROC_STATE_AT_STG3 0x0020 /* process is at Stage Gate 3 barrier in orte_finalize */ +#define ORTE_PROC_STATE_FINALIZED 0x0040 /* process has completed orte_finalize and is running */ +#define ORTE_PROC_STATE_TERMINATED 0x0080 /* process has terminated and is no longer running */ +#define ORTE_PROC_STATE_ABORTED 0x0100 /* process aborted */ +#define ORTE_PROC_STATE_FAILED_TO_START 0x0200 /* process failed to start */ +/* this process has been ordered to "die", but may not have completed it yet. Don't tell it again */ +#define ORTE_PROC_STATE_ABORT_ORDERED 0x0400 /** define some common shorthands for when we want to be alerted */ #define ORTE_PROC_STATE_ALL 0xffff /* alert on ALL triggers */ #define ORTE_PROC_STAGE_GATES_ONLY ORTE_PROC_STATE_AT_STG1 | ORTE_PROC_STATE_AT_STG2 | ORTE_PROC_STATE_AT_STG3 | ORTE_PROC_STATE_FINALIZED -#define ORTE_PROC_STATE_TERMINATION ORTE_PROC_STATE_TERMINATED | ORTE_PROC_STATE_ABORTED #define ORTE_PROC_STATE_NONE 0x0000 /* don't alert on any triggers */ /* @@ -57,15 +59,18 @@ typedef uint16_t orte_proc_state_t; typedef uint16_t orte_job_state_t; #define ORTE_JOB_STATE_T ORTE_UINT16 -#define ORTE_JOB_STATE_INIT 0x0001 /* job entry has been created by rmaps */ -#define ORTE_JOB_STATE_LAUNCHED 0x0002 /* job has been launched by pls */ -#define ORTE_JOB_STATE_AT_STG1 0x0004 /* all processes are at Stage Gate 1 barrier in orte_init */ -#define ORTE_JOB_STATE_AT_STG2 0x0008 /* all processes are at Stage Gate 2 barrier in orte_init */ -#define ORTE_JOB_STATE_RUNNING 0x0010 /* all processes have exited orte_init and is running */ -#define ORTE_JOB_STATE_AT_STG3 0x0020 /* all processes are at Stage Gate 3 barrier in orte_finalize */ -#define ORTE_JOB_STATE_FINALIZED 0x0040 /* all processes have completed orte_finalize and is running */ -#define ORTE_JOB_STATE_TERMINATED 0x0080 /* all processes have terminated and is no longer running */ -#define ORTE_JOB_STATE_ABORTED 0x0100 /* at least one process aborted, causing job to abort */ +#define ORTE_JOB_STATE_INIT 0x0001 /* job entry has been created by rmaps */ +#define ORTE_JOB_STATE_LAUNCHED 0x0002 /* job has been launched by pls */ +#define ORTE_JOB_STATE_AT_STG1 0x0004 /* all processes are at Stage Gate 1 barrier in orte_init */ +#define ORTE_JOB_STATE_AT_STG2 0x0008 /* all processes are at Stage Gate 2 barrier in orte_init */ +#define ORTE_JOB_STATE_RUNNING 0x0010 /* all processes have exited orte_init and is running */ +#define ORTE_JOB_STATE_AT_STG3 0x0020 /* all processes are at Stage Gate 3 barrier in orte_finalize */ +#define ORTE_JOB_STATE_FINALIZED 0x0040 /* all processes have completed orte_finalize and is running */ +#define ORTE_JOB_STATE_TERMINATED 0x0080 /* all processes have terminated and is no longer running */ +#define ORTE_JOB_STATE_ABORTED 0x0100 /* at least one process aborted, causing job to abort */ +#define ORTE_JOB_STATE_FAILED_TO_START 0x0200 /* at least one process failed to start */ +/* the processes in this job have been ordered to "die", but may not have completed it yet. Don't order it again */ +#define ORTE_JOB_STATE_ABORT_ORDERED 0x0400 /** * Node State, corresponding to the ORTE_NODE_STATE_* #defines, diff --git a/orte/runtime/orte_abort.c b/orte/runtime/orte_abort.c index eb797ea6bd..67fd6e7d78 100644 --- a/orte/runtime/orte_abort.c +++ b/orte/runtime/orte_abort.c @@ -17,36 +17,59 @@ */ #include "orte_config.h" +#include "orte/orte_constants.h" #include -#include -#include +#ifdef HAVE_FCNTL_H +#include +#endif +#ifdef HAVE_UNISTD_H +#include +#endif -#include "orte/orte_constants.h" -#include "orte/runtime/runtime.h" #include "opal/util/output.h" #include "opal/runtime/opal_progress.h" #include "opal/event/event.h" +#include "opal/util/os_path.h" + +#include "orte/runtime/runtime.h" #include "orte/util/session_dir.h" #include "orte/util/sys_info.h" +#include "orte/mca/errmgr/errmgr.h" -int orte_abort(int status, char *fmt, ...) +/* + * We do NOT call the regular C-library "abort" function, even + * though that would have alerted us to the fact that this is + * an abnormal termination, because it would automatically cause + * a core file to be generated. On large systems, that can be + * overwhelming (imagine a few thousand Gbyte-sized files hitting + * a shared file system simultaneously...ouch!). + * + * However, this causes a problem for OpenRTE as the system truly + * needs to know that this actually IS an abnormal termination. + * To get around the problem, we create a file in the session + * directory - we don't need to put anything in it, though, as its + * very existence simply alerts us that this was an abnormal + * termination. + * + * The session directory finalize system will clean this file up + * for us automagically. However, it needs to stick around long + * enough for our local daemon to find it! So, we do NOT call + * session_dir_finalize here!!! Someone will clean up for us. + * + * In some cases, however, we DON'T want to create that alert. For + * example, if an orted detects that the HNP has died, then there + * is truly nobody to alert! In these cases, we pass report=false + * to prevent the abort file from being created. This allows the + * session directory tree to cleanly be eliminated. + */ +int orte_abort(int status, bool report) { - va_list arglist; + char *abort_file; + int fd; - /* If there was a message, output it */ - - va_start(arglist, fmt); - if( NULL != fmt ) { - char* buffer = NULL; - vasprintf( &buffer, fmt, arglist ); - opal_output( 0, buffer ); - free( buffer ); - } - va_end(arglist); - - /* Exit - do NOT do normal finalize as this will very likely + /* Exit - do NOT do a normal finalize as this will very likely * hang the process. We are aborting due to an abnormal condition * that precludes normal cleanup * @@ -55,9 +78,21 @@ int orte_abort(int status, char *fmt, ...) * - Assume errmgr cleans up child processes before we exit. */ - /* - Clean up session directory */ - orte_session_dir_finalize(orte_process_info.my_name); - + /* If we were asked to report this termination, + * write an "abort" file into our session directory + */ + if (report) { + abort_file = opal_os_path(false, orte_process_info.proc_session_dir, "abort", NULL); + if (NULL == abort_file) { + /* got a problem */ + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + goto CLEANUP; + } + fd = open(abort_file, O_CREAT); + if (0 < fd) close(fd); + } + +CLEANUP: /* - Clean out the global structures * (not really necessary, but good practice) */ orte_sys_info_finalize(); diff --git a/orte/runtime/orte_init_stage1.c b/orte/runtime/orte_init_stage1.c index 4cd554c1e8..032f44b3a7 100644 --- a/orte/runtime/orte_init_stage1.c +++ b/orte/runtime/orte_init_stage1.c @@ -47,9 +47,11 @@ #include "orte/mca/sds/base/base.h" #include "orte/mca/gpr/base/base.h" #include "orte/mca/ras/base/base.h" -#include "orte/mca/ras/base/ras_base_node.h" #include "orte/mca/rds/base/base.h" +#include "orte/mca/pls/base/base.h" #include "orte/mca/rmgr/base/base.h" +#include "orte/mca/odls/base/base.h" + #include "orte/mca/rmaps/base/base.h" #include "orte/mca/schema/base/base.h" #include "orte/mca/smr/base/base.h" @@ -59,6 +61,10 @@ #include "orte/util/sys_info.h" #include "orte/util/universe_setup_file_io.h" +/* these are to be cleaned up for 2.0 */ +#include "orte/mca/ras/base/ras_private.h" +#include "orte/mca/rmgr/base/rmgr_private.h" + #include "orte/runtime/runtime.h" #include "orte/runtime/runtime_internal.h" #include "orte/runtime/orte_wait.h" @@ -265,6 +271,10 @@ int orte_init_stage1(bool infrastructure) free(orte_universe_info.seed_uri); } orte_universe_info.seed_uri = orte_rml.get_uri(); + /* and make sure that the daemon flag is NOT set so that + * components unique to non-HNP orteds can be selected + */ + orte_process_info.daemon = false; } /* setup my session directory */ @@ -346,9 +356,72 @@ int orte_init_stage1(bool infrastructure) } /* - * setup the resource manager + * Now that we know for certain if we are an HNP and/or a daemon, + * setup the resource management frameworks. This includes opening + * and selecting the daemon launch framework - that framework "knows" + * what to do if it isn't in a daemon, and everyone needs that framework + * to at least register its datatypes. */ - + if (ORTE_SUCCESS != (ret = orte_rds_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_rds_base_open"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_rds_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_rds_base_select"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_ras_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_ras_base_open"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_ras_base_find_available())) { + ORTE_ERROR_LOG(ret); + error = "orte_ras_base_find_available"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_rmaps_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_rmaps_base_open"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_rmaps_base_find_available())) { + ORTE_ERROR_LOG(ret); + error = "orte_rmaps_base_find_available"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_pls_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_pls_base_open"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_pls_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_pls_base_select"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_odls_base_open())) { + ORTE_ERROR_LOG(ret); + error = "orte_odls_base_open"; + goto error; + } + + if (ORTE_SUCCESS != (ret = orte_odls_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_odls_base_select"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_rmgr_base_open())) { ORTE_ERROR_LOG(ret); error = "orte_rmgr_base_open"; @@ -362,7 +435,7 @@ int orte_init_stage1(bool infrastructure) } /* - * setup the state-of-health monitor + * setup the state monitor */ if (ORTE_SUCCESS != (ret = orte_smr_base_open())) { ORTE_ERROR_LOG(ret); @@ -376,7 +449,16 @@ int orte_init_stage1(bool infrastructure) goto error; } - /* if we are a singleton or the seed, setup the infrastructure for our job */ + /* + * setup the errmgr + */ + if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) { + ORTE_ERROR_LOG(ret); + error = "orte_smr_base_select"; + goto error; + } + + /* if we are a singleton or the seed, setup the infrastructure for our job */ if(orte_process_info.singleton || orte_process_info.seed) { char *site, *resource; @@ -430,7 +512,6 @@ int orte_init_stage1(bool infrastructure) orte_rds_cell_desc_t *rds_item; orte_rds_cell_attr_t *new_attr; orte_ras_node_t *ras_item; - orte_ras_base_module_t *module; OBJ_CONSTRUCT(&single_host, opal_list_t); OBJ_CONSTRUCT(&rds_single_host, opal_list_t); @@ -502,7 +583,7 @@ int orte_init_stage1(bool infrastructure) opal_list_append(&rds_single_host, &rds_item->super); /* Store into registry */ - ret = orte_rds_base_store_resource(&rds_single_host); + ret = orte_rds.store_resource(&rds_single_host); if (ORTE_SUCCESS != ret ) { ORTE_ERROR_LOG(ret); error = "orte_rds.store_resource"; @@ -528,12 +609,7 @@ int orte_init_stage1(bool infrastructure) you'll end up with the localhost *and* all the other nodes in your allocation on the node segment -- which is probably fine) */ - orte_ras_base_allocate(my_jobid, &module); - if (NULL == module) { - error = "orte_ras NULL module"; - goto error; - } - orte_ras = *module; + orte_ras.allocate_job(my_jobid); OBJ_DESTRUCT(&single_host); OBJ_DESTRUCT(&rds_single_host); @@ -545,9 +621,9 @@ int orte_init_stage1(bool infrastructure) error = "orte_rmgr_base_set_job_slots"; goto error; } - if (ORTE_SUCCESS != (ret = orte_rmaps_base_set_vpid_range(my_jobid,0,1))) { + if (ORTE_SUCCESS != (ret = orte_rmgr.set_vpid_range(my_jobid,0,1))) { ORTE_ERROR_LOG(ret); - error = "orte_rmaps_base_set_vpid_range"; + error = "orte_rmgr.set_vpid_range"; goto error; } if (ORTE_SUCCESS != (ret = orte_rmgr_base_proc_stage_gate_init(my_jobid))) { diff --git a/orte/runtime/orte_system_finalize.c b/orte/runtime/orte_system_finalize.c index 41ec41c60b..4f499d5fab 100644 --- a/orte/runtime/orte_system_finalize.c +++ b/orte/runtime/orte_system_finalize.c @@ -33,9 +33,14 @@ #include "orte/mca/ns/base/base.h" #include "orte/mca/gpr/base/base.h" #include "orte/mca/errmgr/base/base.h" +#include "orte/mca/rds/base/base.h" +#include "orte/mca/ras/base/base.h" +#include "orte/mca/rmaps/base/base.h" +#include "orte/mca/pls/base/base.h" #include "orte/mca/schema/base/base.h" #include "orte/mca/iof/base/base.h" #include "orte/mca/rmgr/base/base.h" +#include "orte/mca/odls/base/base.h" #include "orte/util/session_dir.h" #include "orte/util/sys_info.h" #include "orte/util/proc_info.h" @@ -61,17 +66,31 @@ int orte_system_finalize(void) free(contact_path); } - /* rmgr close depends on wait/iof */ + /* rmgr and odls close depend on wait/iof */ orte_rmgr_base_close(); + orte_odls_base_close(); orte_wait_finalize(); orte_iof_base_close(); orte_ns_base_close(); orte_gpr_base_close(); orte_schema_base_close(); + + /* finalize selected modules so they can de-register + * their receives + */ + orte_rds_base_close(); + orte_ras_base_close(); + orte_rmaps_base_close(); + orte_pls_base_close(); + /* the errmgr close function retains the base + * module so that error logging can continue + */ + orte_errmgr_base_close(); + + /* now can close the rml */ orte_rml_base_close(); orte_dss_close(); - orte_errmgr_base_close(); opal_progress_finalize(); diff --git a/orte/runtime/runtime.h b/orte/runtime/runtime.h index b05590dcf2..ccc686fbbd 100644 --- a/orte/runtime/runtime.h +++ b/orte/runtime/runtime.h @@ -74,13 +74,17 @@ extern "C" { ORTE_DECLSPEC extern int orte_debug_flag; /** - * Abort the current application with a pretty-print error message + * Abort the current application * - * Aborts currently running application with \code abort(), pretty - * printing an error message if possible. Error message should be - * specified using the standard \code printf() format. + * Aborts currently running application, NOTE: We do NOT call the + * regular C-library "abort" function, even + * though that would have alerted us to the fact that this is + * an abnormal termination, because it would automatically cause + * a core file to be generated. The "report" flag indicates if the + * function should create an appropriate file to alert the local + * orted that termination was abnormal. */ -ORTE_DECLSPEC int orte_abort(int status, char *fmt, ...); +ORTE_DECLSPEC int orte_abort(int status, bool report); /** diff --git a/orte/test/mpi/Makefile b/orte/test/mpi/Makefile index fb9af36917..57e3cdc67d 100644 --- a/orte/test/mpi/Makefile +++ b/orte/test/mpi/Makefile @@ -1,4 +1,4 @@ -PROGS = mpi_no_op hello hello_null hello_nodename +PROGS = mpi_no_op hello hello_nodename abort multi_abort all: $(PROGS) diff --git a/orte/test/mpi/abort.c b/orte/test/mpi/abort.c new file mode 100644 index 0000000000..bfbc4c931b --- /dev/null +++ b/orte/test/mpi/abort.c @@ -0,0 +1,25 @@ +/* -*- C -*- + * + * $HEADER$ + * + * The most basic of MPI applications + */ + +#include +#include "mpi.h" + +int main(int argc, char* argv[]) +{ + int rank, size; + + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + printf("Hello, World, I am %d of %d\n", rank, size); + + if (1 == rank) MPI_Abort(MPI_COMM_WORLD, 2); + + MPI_Finalize(); + return 0; +} diff --git a/orte/test/mpi/multi_abort.c b/orte/test/mpi/multi_abort.c new file mode 100644 index 0000000000..7bc006da63 --- /dev/null +++ b/orte/test/mpi/multi_abort.c @@ -0,0 +1,25 @@ +/* -*- C -*- + * + * $HEADER$ + * + * The most basic of MPI applications + */ + +#include +#include "mpi.h" + +int main(int argc, char* argv[]) +{ + int rank, size; + + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + printf("Hello, World, I am %d of %d\n", rank, size); + + if (0 != rank) MPI_Abort(MPI_COMM_WORLD, 2); + + MPI_Finalize(); + return 0; +} diff --git a/orte/tools/console/orteconsole.h b/orte/tools/console/orteconsole.h index eca0877bff..bfc94d1bc0 100644 --- a/orte/tools/console/orteconsole.h +++ b/orte/tools/console/orteconsole.h @@ -19,6 +19,16 @@ #ifndef ORTECONSOLE_H #define ORTECONSOLE_H +#include "orte_config.h" + +#include "opal/class/opal_list.h" +#include "opal/threads/mutex.h" +#include "opal/threads/condition.h" + +#include "orte/mca/odls/odls_types.h" + +#include "orte/tools/orted/orted.h" + #if defined(c_plusplus) || defined(__cplusplus) extern "C" { #endif diff --git a/orte/tools/orte-ps/orte-ps.c b/orte/tools/orte-ps/orte-ps.c index 5e82b9db01..23ea2c39bf 100644 --- a/orte/tools/orte-ps/orte-ps.c +++ b/orte/tools/orte-ps/orte-ps.c @@ -62,11 +62,11 @@ #include "orte/util/session_dir.h" #include "orte/util/universe_setup_file_io.h" #include "orte/mca/gpr/gpr.h" -#include "orte/mca/rmgr/base/base.h" +#include "orte/mca/rmgr/rmgr.h" #include "orte/mca/ras/ras.h" #include "orte/mca/ras/ras_types.h" #include "orte/mca/ras/base/base.h" -#include "orte/mca/ras/base/ras_base_node.h" +#include "orte/mca/ras/base/ras_private.h" #include "opal/runtime/opal.h" #include "orte/runtime/runtime.h" @@ -1079,9 +1079,9 @@ static int gather_job_info(orte_ps_universe_info_t* universe) { /* * Get the App Context(s) */ - orte_rmgr_base_get_app_context(job->id, - &job->app_context, - &job->num_app_context); + orte_rmgr.get_app_context(job->id, + &job->app_context, + &job->num_app_context); /* * Access the job segment */ diff --git a/orte/tools/orted/Makefile.am b/orte/tools/orted/Makefile.am index 87a66beada..210b45c138 100644 --- a/orte/tools/orted/Makefile.am +++ b/orte/tools/orted/Makefile.am @@ -27,7 +27,9 @@ endif libs = \ $(top_builddir)/orte/liborte.la -headers = orted.h +headers = \ + orted.h + orted_SOURCES = \ $(headers) \ orted.c diff --git a/orte/tools/orted/orted.c b/orte/tools/orted/orted.c index 598c93182c..bf42a452d9 100644 --- a/orte/tools/orted/orted.c +++ b/orte/tools/orted/orted.c @@ -49,6 +49,7 @@ #include "opal/util/trace.h" #include "orte/dss/dss.h" +#include "orte/class/orte_value_array.h" #include "orte/util/sys_info.h" #include "orte/util/proc_info.h" #include "orte/util/univ_info.h" @@ -63,6 +64,7 @@ #include "orte/mca/smr/smr.h" #include "orte/mca/rmgr/rmgr.h" #include "orte/mca/rmgr/base/base.h" +#include "orte/mca/odls/odls.h" #include "orte/runtime/runtime.h" @@ -72,16 +74,22 @@ extern char** environ; #endif /* !defined(__WINDOWS__) */ +/* + * Globals + */ orted_globals_t orted_globals; static struct opal_event term_handler; static struct opal_event int_handler; static void signal_callback(int fd, short flags, void *arg); -static void job_state_callback(orte_gpr_notify_data_t *data, void *cbdata); static void orte_daemon_recv(int status, orte_process_name_t* sender, - orte_buffer_t *buffer, orte_rml_tag_t tag, - void* cbdata); + orte_buffer_t *buffer, orte_rml_tag_t tag, + void* cbdata); +static void orte_daemon_recv_pls(int status, orte_process_name_t* sender, + orte_buffer_t *buffer, orte_rml_tag_t tag, + void* cbdata); +static void orted_local_cb_launcher(orte_gpr_notify_data_t *data, void *user_tag); /* * define the orted context table for obtaining parameters @@ -92,10 +100,14 @@ opal_cmd_line_init_t orte_cmd_line_opts[] = { &orted_globals.help, OPAL_CMD_LINE_TYPE_BOOL, "This help message" }, - { "orte", "debug", NULL, 'd', NULL, "debug", 0, - &orted_globals.debug, OPAL_CMD_LINE_TYPE_BOOL, - "Debug the OpenRTE" }, + { "orted", "spin", NULL, 'd', NULL, "spin", 0, + &orted_globals.spin, OPAL_CMD_LINE_TYPE_BOOL, + "Have the orted spin until we can connect a debugger to it" }, + { "orte", "debug", NULL, 'd', NULL, "debug", 0, + &orted_globals.debug, OPAL_CMD_LINE_TYPE_BOOL, + "Debug the OpenRTE" }, + { "orte", "no_daemonize", NULL, '\0', NULL, "no-daemonize", 0, &orted_globals.no_daemonize, OPAL_CMD_LINE_TYPE_BOOL, "Don't daemonize into the background" }, @@ -181,6 +193,10 @@ int main(int argc, char *argv[]) char *log_path = NULL; char log_file[PATH_MAX]; char *jobidstring; + orte_gpr_value_t *value; + char *segment; + char *param; + int i; /* setup to check common command line options that just report and die */ memset(&orted_globals, 0, sizeof(orted_globals_t)); @@ -206,6 +222,12 @@ int main(int argc, char *argv[]) return 1; } + /* see if they want us to spin until they can connect a debugger to us */ + while (orted_globals.spin) { + i++; + if (1000 < i) i=0; + } + /* Okay, now on to serious business * First, ensure the process info structure in instantiated and initialized * and set the daemon flag to true @@ -256,6 +278,41 @@ int main(int argc, char *argv[]) } } + /* Protect the daemon from MCA params that select specific components. We want + * the daemon to be free to select the proxy components, so we have to ensure + * that we aren't picking up directives intended for HNPs. + */ + if(NULL == (param = mca_base_param_environ_variable("rds",NULL,NULL))) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + opal_unsetenv(param, &environ); + free(param); + if(NULL == (param = mca_base_param_environ_variable("ras",NULL,NULL))) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + opal_unsetenv(param, &environ); + free(param); + if(NULL == (param = mca_base_param_environ_variable("rmaps",NULL,NULL))) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + opal_unsetenv(param, &environ); + free(param); + if(NULL == (param = mca_base_param_environ_variable("pls",NULL,NULL))) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + opal_unsetenv(param, &environ); + free(param); + if(NULL == (param = mca_base_param_environ_variable("rmgr",NULL,NULL))) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return ORTE_ERR_OUT_OF_RESOURCE; + } + opal_unsetenv(param, &environ); + free(param); + /* turn on debug if debug_file is requested so output will be generated */ if (orted_globals.debug_daemons_file) { orted_globals.debug_daemons = true; @@ -336,10 +393,22 @@ int main(int argc, char *argv[]) } } - /* setup the thread lock and condition variable */ + /* setup the thread lock and condition variables */ OBJ_CONSTRUCT(&orted_globals.mutex, opal_mutex_t); OBJ_CONSTRUCT(&orted_globals.condition, opal_condition_t); + /* register the daemon main receive functions */ + ret = orte_rml.recv_buffer_nb(ORTE_RML_NAME_ANY, ORTE_RML_TAG_PLS_ORTED, 0, orte_daemon_recv_pls, NULL); + if (ret != ORTE_SUCCESS && ret != ORTE_ERR_NOT_IMPLEMENTED) { + ORTE_ERROR_LOG(ret); + return ret; + } + ret = orte_rml.recv_buffer_nb(ORTE_RML_NAME_ANY, ORTE_RML_TAG_DAEMON, 0, orte_daemon_recv, NULL); + if (ret != ORTE_SUCCESS && ret != ORTE_ERR_NOT_IMPLEMENTED) { + ORTE_ERROR_LOG(ret); + return ret; + } + /* check to see if I'm a bootproxy */ if (orted_globals.bootproxy) { /* perform bootproxy-specific things */ if (orted_globals.mpi_call_yield > 0) { @@ -348,22 +417,75 @@ int main(int argc, char *argv[]) opal_setenv(var, "1", true, &environ); } - /* setup callback on jobid */ - ret = orte_rmgr_base_proc_stage_gate_subscribe(orted_globals.bootproxy, job_state_callback, NULL, ORTE_PROC_STATE_TERMINATION); - if(ORTE_SUCCESS != ret) { + /* attach a subscription to the orted standard trigger so I can get + * information on the processes I am to locally launch as soon as all + * the orteds for this job are started. + * + * Once the registry gets to 2.0, we will be able to setup the + * subscription so we only get our own launch info back. In the interim, + * we setup the subscription so that ALL launch info for this job + * is returned. We will then have to parse that message to get our + * own local launch info. + * + * Since we have chosen this approach, we can take advantage of the + * fact that the callback function will directly receive this data. + * By setting up that callback function to actually perform the launch + * based on the received data, all we have to do here is go into our + * conditioned wait until the job completes! + * + * Sometimes, life can be good! :-) + */ + + /** put all this registry stuff in a compound command to limit communications */ + if (ORTE_SUCCESS != (ret = orte_gpr.begin_compound_cmd())) { ORTE_ERROR_LOG(ret); return ret; } - if (ORTE_SUCCESS != (ret = orte_rmgr.launch(orted_globals.bootproxy))) { - /* cleanup session directory */ - orte_session_dir_cleanup(orted_globals.bootproxy); - /* Finalize the runtime - don't worry about error codes at this point */ - orte_finalize(); - exit(ret); + /* let the local launcher setup a subscription for its required data. We + * pass the local_cb_launcher function so that this gets called back - this + * allows us to wakeup the orted so it can exit cleanly if the callback + * generates an error + */ + if (ORTE_SUCCESS != (ret = orte_odls.subscribe_launch_data(orted_globals.bootproxy, orted_local_cb_launcher))) { + ORTE_ERROR_LOG(ret); + return ret; } - /* setup and enter the event monitor */ + /* get the job segment name */ + if (ORTE_SUCCESS != (ret = orte_schema.get_job_segment_name(&segment, orted_globals.bootproxy))) { + ORTE_ERROR_LOG(ret); + return ret; + } + + /** increment the orted stage gate counter */ + if (ORTE_SUCCESS != (ret = orte_gpr.create_value(&value, ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_AND, + segment, 1, 1))) { + ORTE_ERROR_LOG(ret); + return ret; + } + free(segment); /* done with this now */ + + value->tokens[0] = strdup(ORTE_JOB_GLOBALS); + if (ORTE_SUCCESS != (ret = orte_gpr.create_keyval(&(value->keyvals[0]), ORTED_LAUNCH_STAGE_GATE_CNTR, ORTE_UNDEF, NULL))) { + ORTE_ERROR_LOG(ret); + return ret; + } + + /* do the increment */ + if (ORTE_SUCCESS != (ret = orte_gpr.increment_value(value))) { + ORTE_ERROR_LOG(ret); + return ret; + } + OBJ_RELEASE(value); /* done with this now */ + + /** send the compound command */ + if (ORTE_SUCCESS != (ret = orte_gpr.exec_compound_cmd())) { + ORTE_ERROR_LOG(ret); + return ret; + } + + /* setup and enter the event monitor to wait for a wakeup call */ OPAL_THREAD_LOCK(&orted_globals.mutex); while (false == orted_globals.exit_condition) { opal_condition_wait(&orted_globals.condition, &orted_globals.mutex); @@ -381,7 +503,7 @@ int main(int argc, char *argv[]) } /* - * Set my process status to "starting". Note that this must be done + * Set my process status to "running". Note that this must be done * after the rte init is completed. */ if (ORTE_SUCCESS != (ret = orte_smr.set_proc_state(orte_process_info.my_name, @@ -394,13 +516,6 @@ int main(int argc, char *argv[]) opal_output(0, "[%lu,%lu,%lu] ompid: issuing callback", ORTE_NAME_ARGS(orte_process_info.my_name)); } - /* register the daemon main callback function */ - ret = orte_rml.recv_buffer_nb(ORTE_RML_NAME_ANY, ORTE_RML_TAG_DAEMON, 0, orte_daemon_recv, NULL); - if (ret != ORTE_SUCCESS && ret != ORTE_ERR_NOT_IMPLEMENTED) { - ORTE_ERROR_LOG(ret); - return ret; - } - /* go through the universe fields and see what else I need to do * - could be setup a virtual machine, spawn a console, etc. */ @@ -437,6 +552,30 @@ int main(int argc, char *argv[]) exit(0); } +/* this function receives the trigger callback from the orted launch stage gate + * and passes it to the orted local launcher for processing. We do this intermediate + * step so that we can get an error code if anything went wrong and, if so, wakeup the + * orted so we can gracefully die + */ +static void orted_local_cb_launcher(orte_gpr_notify_data_t *data, void *user_tag) +{ + int rc; + + /* pass the data to the orted_local_launcher and get a report on + * success or failure of the launch + */ + if (ORTE_SUCCESS != (rc = orte_odls.launch_local_procs(data))) { + /* if there was an error, report it and wakeup the orted */ + ORTE_ERROR_LOG(rc); + orted_globals.exit_condition = true; + opal_condition_signal(&orted_globals.condition); + } + + /* all done - return and let the orted sleep until something happens */ + return; +} + + static void signal_callback(int fd, short flags, void *arg) { OPAL_TRACE(1); @@ -444,7 +583,7 @@ static void signal_callback(int fd, short flags, void *arg) opal_condition_signal(&orted_globals.condition); } -static void orte_daemon_recv(int status, orte_process_name_t* sender, +static void orte_daemon_recv_pls(int status, orte_process_name_t* sender, orte_buffer_t *buffer, orte_rml_tag_t tag, void* cbdata) { @@ -452,7 +591,9 @@ static void orte_daemon_recv(int status, orte_process_name_t* sender, orte_daemon_cmd_flag_t command; int ret; orte_std_cntr_t n; - char *contact_info; + int32_t signal; + orte_gpr_notify_data_t *ndat; + orte_jobid_t job; OPAL_TRACE(1); @@ -462,70 +603,114 @@ static void orte_daemon_recv(int status, orte_process_name_t* sender, opal_output(0, "[%lu,%lu,%lu] ompid: received message", ORTE_NAME_ARGS(orte_process_info.my_name)); } + /* unpack the command */ + n = 1; + if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &command, &n, ORTE_DAEMON_CMD))) { + ORTE_ERROR_LOG(ret); + goto DONE; + } + + /* setup the answer */ + answer = OBJ_NEW(orte_buffer_t); if (NULL == answer) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); goto DONE; } - n = 1; - if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &command, &n, ORTE_DAEMON_CMD))) { + /* pack the command to ensure we always have something to send back, and + * so that the caller can verify communication + */ + if (ORTE_SUCCESS != (ret = orte_dss.pack(answer, &command, 1, ORTE_DAEMON_CMD))) { ORTE_ERROR_LOG(ret); goto CLEANUP; } + + switch(command) { - /**** EXIT COMMAND ****/ - if (ORTE_DAEMON_EXIT_CMD == command) { - orted_globals.exit_condition = true; - opal_condition_signal(&orted_globals.condition); + /**** KILL_LOCAL_PROCS ****/ + case ORTE_DAEMON_KILL_LOCAL_PROCS: + /* unpack the jobid - could be JOBID_WILDCARD, which would indicatge + * we should kill all local procs. Otherwise, only kill those within + * the specified jobid + */ + n = 1; + if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &job, &n, ORTE_JOBID))) { + ORTE_ERROR_LOG(ret); + goto DONE; + } - goto CLEANUP; + if (ORTE_SUCCESS != (ret = orte_odls.kill_local_procs(job, true))) { + ORTE_ERROR_LOG(ret); + } + break; + + /**** SIGNAL_LOCAL_PROCS ****/ + case ORTE_DAEMON_SIGNAL_LOCAL_PROCS: + /* get the signal */ + n = 1; + if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &signal, &n, ORTE_INT32))) { + ORTE_ERROR_LOG(ret); + goto DONE; + } + + /* see if they specified a process to signal, or if we + * should just signal them all + * + * NOTE: FOR NOW, WE JUST SIGNAL ALL CHILDREN + */ - /**** CONTACT QUERY COMMAND ****/ - } else if (ORTE_DAEMON_CONTACT_QUERY_CMD == command) { - /* send back contact info */ - contact_info = orte_rml.get_uri(); + if (ORTE_SUCCESS != (ret = orte_odls.signal_local_procs(NULL, signal))) { + ORTE_ERROR_LOG(ret); + } + break; - if (NULL == contact_info) { - ORTE_ERROR_LOG(ORTE_ERROR); - goto CLEANUP; - } + /**** ADD_LOCAL_PROCS ****/ + case ORTE_DAEMON_ADD_LOCAL_PROCS: + /* unpack the notify data object */ + if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &ndat, &n, ORTE_GPR_NOTIFY_DATA))) { + ORTE_ERROR_LOG(ret); + goto DONE; + } + + /* launch the processes */ + if (ORTE_SUCCESS != (ret = orte_odls.launch_local_procs(ndat))) { + ORTE_ERROR_LOG(ret); + } - if (ORTE_SUCCESS != (ret = orte_dss.pack(answer, &contact_info, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(ret); - goto CLEANUP; - } + /* cleanup the memory */ + OBJ_RELEASE(ndat); + break; + + /**** EXIT COMMAND ****/ + case ORTE_DAEMON_EXIT_CMD: + /* send the response before we wakeup because otherwise + * we'll depart before it gets out! + */ + if (0 > orte_rml.send_buffer(sender, answer, ORTE_RML_TAG_PLS_ORTED, 0)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + } + orted_globals.exit_condition = true; + opal_condition_signal(&orted_globals.condition); + break; - if (0 > orte_rml.send_buffer(sender, answer, tag, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - } - - goto CLEANUP; - - /**** HOSTFILE COMMAND ****/ - } else if (ORTE_DAEMON_HOSTFILE_CMD == command) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_IMPLEMENTED); - goto CLEANUP; - - /**** SCRIPTFILE COMMAND ****/ - } else if (ORTE_DAEMON_SCRIPTFILE_CMD == command) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_IMPLEMENTED); - goto CLEANUP; - - /**** HEARTBEAT COMMAND ****/ - } else if (ORTE_DAEMON_HEARTBEAT_CMD == command) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_IMPLEMENTED); - goto CLEANUP; + default: + ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); + break; } +DONE: + /* send the response */ + if (0 > orte_rml.send_buffer(sender, answer, ORTE_RML_TAG_PLS_ORTED, 0)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + } + CLEANUP: OBJ_RELEASE(answer); - - DONE: OPAL_THREAD_UNLOCK(&orted_globals.mutex); /* reissue the non-blocking receive */ - ret = orte_rml.recv_buffer_nb(ORTE_RML_NAME_ANY, ORTE_RML_TAG_DAEMON, 0, orte_daemon_recv, NULL); + ret = orte_rml.recv_buffer_nb(ORTE_RML_NAME_ANY, ORTE_RML_TAG_PLS_ORTED, 0, orte_daemon_recv_pls, NULL); if (ret != ORTE_SUCCESS && ret != ORTE_ERR_NOT_IMPLEMENTED) { ORTE_ERROR_LOG(ret); } @@ -533,81 +718,92 @@ static void orte_daemon_recv(int status, orte_process_name_t* sender, return; } -/* Function callback on jobid state changes. - * This is closely modeled after orte_rmgr_proxy_callback in rmgr_proxy.c - */ -void job_state_callback(orte_gpr_notify_data_t *data, void *cbdata) +static void orte_daemon_recv(int status, orte_process_name_t* sender, + orte_buffer_t *buffer, orte_rml_tag_t tag, + void* cbdata) { - orte_gpr_value_t **values, *value; - orte_gpr_keyval_t** keyvals; - orte_jobid_t jobid; - orte_std_cntr_t i, j, k; - int rc; - + orte_buffer_t *answer; + orte_daemon_cmd_flag_t command; + int ret; + orte_std_cntr_t n; + char *contact_info; + OPAL_TRACE(1); - - /* we made sure in the subscriptions that at least one - * value is always returned - * get the jobid from the segment name in the first value - */ - values = (orte_gpr_value_t**)(data->values)->addr; - if (ORTE_SUCCESS != (rc = - orte_schema.extract_jobid_from_segment_name(&jobid, - values[0]->segment))) { - ORTE_ERROR_LOG(rc); - return; + + OPAL_THREAD_LOCK(&orted_globals.mutex); + + if (orted_globals.debug_daemons) { + opal_output(0, "[%lu,%lu,%lu] ompid: received message", ORTE_NAME_ARGS(orte_process_info.my_name)); } - - for (i = 0, k = 0; k < (data->cnt) && i < (data->values)->size; ++i) { - - if (NULL != values[i]) { - k++; - value = values[i]; - - /* determine the state change */ - keyvals = value->keyvals; - for (j = 0; j < value->cnt; ++j) { - orte_gpr_keyval_t* keyval = keyvals[j]; - - if(strcmp(keyval->key, ORTE_PROC_NUM_TERMINATED) == 0) { - OPAL_THREAD_LOCK(&orted_globals.mutex); - - if (orted_globals.debug) { - opal_output(0, "orted: job_state_callback(jobid = %d, state = ORTE_PROC_STATE_TERMINATED)\n", - jobid); - } - - orted_globals.exit_condition = true; - opal_condition_signal(&orted_globals.condition); - - OPAL_THREAD_UNLOCK(&orted_globals.mutex); - continue; - } - - else if(strcmp(keyval->key, ORTE_PROC_NUM_ABORTED) == 0) { - OPAL_THREAD_LOCK(&orted_globals.mutex); - - if (orted_globals.debug) { - opal_output(0, "orted: job_state_callback(jobid = %d, state = ORTE_PROC_STATE_ABORTED)\n", - jobid); - } - - orted_globals.exit_condition = true; - opal_condition_signal(&orted_globals.condition); - - OPAL_THREAD_UNLOCK(&orted_globals.mutex); - continue; - } - else { - if (orted_globals.debug) { - opal_output(0, "orted: job_state_callback(jobid = %d, state = %d)\n", - jobid, keyval->key); - } - } - } + + answer = OBJ_NEW(orte_buffer_t); + if (NULL == answer) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + goto DONE; + } + + n = 1; + if (ORTE_SUCCESS != (ret = orte_dss.unpack(buffer, &command, &n, ORTE_DAEMON_CMD))) { + ORTE_ERROR_LOG(ret); + goto CLEANUP; + } + + /**** EXIT COMMAND ****/ + if (ORTE_DAEMON_EXIT_CMD == command) { + orted_globals.exit_condition = true; + opal_condition_signal(&orted_globals.condition); + + goto CLEANUP; + + /**** CONTACT QUERY COMMAND ****/ + } else if (ORTE_DAEMON_CONTACT_QUERY_CMD == command) { + /* send back contact info */ + contact_info = orte_rml.get_uri(); + + if (NULL == contact_info) { + ORTE_ERROR_LOG(ORTE_ERROR); + goto CLEANUP; } + + if (ORTE_SUCCESS != (ret = orte_dss.pack(answer, &contact_info, 1, ORTE_STRING))) { + ORTE_ERROR_LOG(ret); + goto CLEANUP; + } + + if (0 > orte_rml.send_buffer(sender, answer, tag, 0)) { + ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); + } + + goto CLEANUP; + + /**** HOSTFILE COMMAND ****/ + } else if (ORTE_DAEMON_HOSTFILE_CMD == command) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_IMPLEMENTED); + goto CLEANUP; + + /**** SCRIPTFILE COMMAND ****/ + } else if (ORTE_DAEMON_SCRIPTFILE_CMD == command) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_IMPLEMENTED); + goto CLEANUP; + + /**** HEARTBEAT COMMAND ****/ + } else if (ORTE_DAEMON_HEARTBEAT_CMD == command) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_IMPLEMENTED); + goto CLEANUP; } - + +CLEANUP: + OBJ_RELEASE(answer); + +DONE: + OPAL_THREAD_UNLOCK(&orted_globals.mutex); + + /* reissue the non-blocking receive */ + ret = orte_rml.recv_buffer_nb(ORTE_RML_NAME_ANY, ORTE_RML_TAG_DAEMON, 0, orte_daemon_recv, NULL); + if (ret != ORTE_SUCCESS && ret != ORTE_ERR_NOT_IMPLEMENTED) { + ORTE_ERROR_LOG(ret); + } + return; } diff --git a/orte/tools/orted/orted.h b/orte/tools/orted/orted.h index 5b1afaba00..d77b182431 100644 --- a/orte/tools/orted/orted.h +++ b/orte/tools/orted/orted.h @@ -20,34 +20,15 @@ #define ORTED_H #include "orte_config.h" +#include "orte/orte_types.h" -#include - -#include "opal/class/opal_list.h" #include "opal/threads/mutex.h" #include "opal/threads/condition.h" - -#include "opal/util/cmd_line.h" -#include "opal/mca/mca.h" - +# #if defined(c_plusplus) || defined(__cplusplus) extern "C" { #endif -/* - * Definitions needed for communication - */ -#define ORTE_DAEMON_HOSTFILE_CMD 0x01 -#define ORTE_DAEMON_SCRIPTFILE_CMD 0x02 -#define ORTE_DAEMON_CONTACT_QUERY_CMD 0x03 -#define ORTE_DAEMON_HEARTBEAT_CMD 0xfe -#define ORTE_DAEMON_EXIT_CMD 0xff - - -/* - * Globals - */ - typedef struct { bool help; bool no_daemonize; @@ -64,11 +45,12 @@ typedef struct { opal_mutex_t mutex; opal_condition_t condition; bool exit_condition; + bool spin; int mpi_call_yield; + int reap; } orted_globals_t; -extern orted_globals_t orted_globals; - + /* * Version-related strings and functions */ diff --git a/orte/tools/orteprobe/orteprobe.c b/orte/tools/orteprobe/orteprobe.c index b17159c2cc..09576826c6 100644 --- a/orte/tools/orteprobe/orteprobe.c +++ b/orte/tools/orteprobe/orteprobe.c @@ -61,6 +61,7 @@ #include "orte/util/universe_setup_file_io.h" #include "orte/mca/rml/base/base.h" #include "orte/mca/rml/rml.h" +#include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/base/base.h" #include "orte/mca/ns/base/base.h" #include "orte/mca/gpr/base/base.h" @@ -313,13 +314,13 @@ int main(int argc, char *argv[]) if (ORTE_SUCCESS != (ret = orte_dss.pack(&buffer, &orted_uri_ptr, 1, ORTE_STRING))) { fprintf(stderr, "orteprobe: failed to pack contact info for existing universe\n"); - orte_abort(1, NULL); + orte_errmgr.error_detected(1, NULL); } if (0 > orte_rml.send_buffer(&requestor, &buffer, ORTE_RML_TAG_PROBE, 0)) { fprintf(stderr, "orteprobe: comm failure when sending contact info for existing univ back to requestor\n"); OBJ_DESTRUCT(&buffer); - orte_abort(1, NULL); + orte_errmgr.error_detected(1, NULL); } OBJ_DESTRUCT(&buffer); @@ -346,7 +347,7 @@ int main(int argc, char *argv[]) if (0 > asprintf(&orte_universe_info.name, "%s-%d", universe, pid)) { fprintf(stderr, "orteprobe: failed to create unique universe name"); - orte_abort(1, NULL); + orte_errmgr.error_detected(1, NULL); } } @@ -354,7 +355,7 @@ int main(int argc, char *argv[]) /* setup the pipe to get the contact info back */ if (pipe(orted_pipe)) { fprintf (stderr, "orteprobe: Pipe failed\n"); - orte_abort(1, NULL); + orte_errmgr.error_detected(1, NULL); } /* get name of orted application - just in case user specified something different */ @@ -369,7 +370,7 @@ int main(int argc, char *argv[]) ortedargc = opal_argv_count(ortedargv); if (ortedargc <= 0) { fprintf(stderr, "orteprobe: could not initialize argv array for daemon\n"); - orte_abort(1, NULL); + orte_errmgr.error_detected(1, NULL); } /* setup the path */ @@ -406,11 +407,11 @@ int main(int argc, char *argv[]) Close read end first. */ execv(path, ortedargv); fprintf(stderr, "orteprobe: execv failed with errno=%d\n", errno); - orte_abort(1, NULL); + orte_errmgr.error_detected(1, NULL); } else if (pid < (pid_t) 0) { /* The fork failed. */ fprintf (stderr, "orteprobe: Fork failed\n"); - orte_abort(1, NULL); + orte_errmgr.error_detected(1, NULL); } else { /* This is the parent process. Close write end first. */ @@ -428,13 +429,13 @@ int main(int argc, char *argv[]) if (ORTE_SUCCESS != (ret = orte_dss.pack(&buffer, &orted_uri_ptr[0], 1, ORTE_STRING))) { fprintf(stderr, "orteprobe: failed to pack daemon uri\n"); - orte_abort(1, NULL); + orte_errmgr.error_detected(1, NULL); } if (0 > orte_rml.send_buffer(&requestor, &buffer, ORTE_RML_TAG_PROBE, 0)) { fprintf(stderr, "orteprobe: could not send daemon uri info back to probe\n"); OBJ_DESTRUCT(&buffer); - orte_abort(1, NULL); + orte_errmgr.error_detected(1, NULL); } OBJ_DESTRUCT(&buffer); diff --git a/orte/tools/orterun/help-orterun.txt b/orte/tools/orterun/help-orterun.txt index 8857cf6081..936c177549 100644 --- a/orte/tools/orterun/help-orterun.txt +++ b/orte/tools/orterun/help-orterun.txt @@ -118,3 +118,7 @@ Things to check: - Ensure that the debugger is installed properly - Ensure that the "%s" executable is in your path - Ensure that any required licenses are available to run the debugger +# +[orterun:daemon-die] +%s was unable to cleanly terminate the daemons for this job. Returned value %d instead of ORTE_SUCCESS. + diff --git a/orte/tools/orterun/orterun.c b/orte/tools/orterun/orterun.c index 09221431ac..e5c1a1822f 100644 --- a/orte/tools/orterun/orterun.c +++ b/orte/tools/orterun/orterun.c @@ -62,8 +62,10 @@ #include "orte/mca/ns/ns.h" #include "orte/mca/gpr/gpr.h" +#include "orte/mca/pls/pls.h" #include "orte/mca/rmgr/rmgr.h" #include "orte/mca/schema/schema.h" +#include "orte/mca/smr/smr.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/runtime/runtime.h" @@ -275,6 +277,7 @@ static int parse_globals(int argc, char* argv[]); static int parse_locals(int argc, char* argv[]); static int parse_appfile(char *filename, char ***env); static void job_state_callback(orte_jobid_t jobid, orte_proc_state_t state); +static void dump_aborted_procs(orte_jobid_t jobid); int orterun(int argc, char *argv[]) @@ -283,6 +286,7 @@ int orterun(int argc, char *argv[]) int rc, i, num_apps, array_size, j; int id, iparam; orte_proc_state_t cb_states; + orte_job_state_t exit_state; /* Setup MCA params */ @@ -440,8 +444,8 @@ int orterun(int argc, char *argv[]) /* Spawn the job */ - cb_states = ORTE_PROC_STATE_ABORTED | ORTE_PROC_STATE_TERMINATED | ORTE_PROC_STATE_AT_STG1; - rc = orte_rmgr.spawn(apps, num_apps, &jobid, job_state_callback, cb_states); + cb_states = ORTE_PROC_STATE_TERMINATED | ORTE_PROC_STATE_AT_STG1; + rc = orte_rmgr.spawn_job(apps, num_apps, &jobid, job_state_callback, cb_states); if (ORTE_SUCCESS != rc) { /* JMS show_help */ opal_output(0, "%s: spawn failed with errno=%d\n", orterun_basename, rc); @@ -455,6 +459,27 @@ int orterun(int argc, char *argv[]) opal_condition_wait(&orterun_globals.cond, &orterun_globals.lock); } + /* check to see if the job was aborted */ + if (ORTE_SUCCESS != (rc = orte_smr.get_job_state(&exit_state, jobid))) { + ORTE_ERROR_LOG(rc); + /* define the exit state as abnormal by default */ + exit_state = ORTE_JOB_STATE_ABORTED; + } + if (ORTE_JOB_STATE_TERMINATED != exit_state) { + /* abnormal termination of some kind */ + dump_aborted_procs(jobid); + /* If we showed more abort messages than were allowed, + show a followup message here */ + if (num_aborted > max_display_aborted) { + i = num_aborted - max_display_aborted; + printf("%d additional process%s aborted (not shown)\n", + i, ((i > 1) ? "es" : "")); + } + if (num_killed > 0) { + printf("%d process%s killed (possibly by Open MPI)\n", + num_killed, ((num_killed > 1) ? "es" : "")); + } + } /* Make sure we propagate the exit code */ if (WIFEXITED(orterun_globals.exit_status)) { rc = WEXITSTATUS(orterun_globals.exit_status); @@ -465,19 +490,16 @@ int orterun(int argc, char *argv[]) * value for the shell */ rc = WTERMSIG(orterun_globals.exit_status) + 128; } + + /* the job is complete - now tell the orteds that it is + * okay to finalize and exit, we are done with them + */ + if (ORTE_SUCCESS != (rc = orte_pls.terminate_orteds(jobid))) { + opal_show_help("help-orterun.txt", "orterun:daemon-die", false, + orterun_basename, NULL, NULL, rc); + } OPAL_THREAD_UNLOCK(&orterun_globals.lock); - /* If we showed more abort messages than were allowed, - show a followup message here */ - if (num_aborted > max_display_aborted) { - i = num_aborted - max_display_aborted; - printf("%d additional process%s aborted (not shown)\n", - i, ((i > 1) ? "es" : "")); - } - if (num_killed > 0) { - printf("%d process%s killed (possibly by Open MPI)\n", - num_killed, ((num_killed > 1) ? "es" : "")); - } } } @@ -642,10 +664,6 @@ static void job_state_callback(orte_jobid_t jobid, orte_proc_state_t state) /* Note that there's only three states that we're interested in here: - ABORTED: which means that one or more processes have aborted - (terminated abnormally). In which case, we probably - want to print out some information. - TERMINATED: which means that all the processes in the job have completed (normally and/or abnormally). @@ -662,11 +680,6 @@ static void job_state_callback(orte_jobid_t jobid, orte_proc_state_t state) } switch(state) { - case ORTE_PROC_STATE_ABORTED: - dump_aborted_procs(jobid); - orte_rmgr.terminate_job(jobid); - break; - case ORTE_PROC_STATE_TERMINATED: orterun_globals.exit_status = 0; /* set the exit status to indicate normal termination */ orterun_globals.exit = true; @@ -731,25 +744,24 @@ static void abort_signal_callback(int fd, short flags, void *arg) return; } if (!orterun_globals.quiet){ - fprintf(stderr, "%s: killing job...", orterun_basename); + fprintf(stderr, "%s: killing job...\n\n", orterun_basename); } + /* terminate the job - this will also wakeup orterun so + * it can kill all the orteds + */ if (jobid != ORTE_JOBID_MAX) { - ret = orte_rmgr.terminate_job(jobid); + ret = orte_pls.terminate_job(jobid); if (ORTE_SUCCESS != ret) { jobid = ORTE_JOBID_MAX; } } - + if (NULL != (event = (opal_event_t*)malloc(sizeof(opal_event_t)))) { opal_evtimer_set(event, exit_callback, NULL); opal_evtimer_add(event, &tv); } - /* make the output a little prettier - move the prompt to its own line */ - if (!orterun_globals.quiet){ - fprintf(stderr, "\n\n"); - } } @@ -771,7 +783,7 @@ static void signal_forward_callback(int fd, short event, void *arg) } /** send the signal out to the processes */ - if (ORTE_SUCCESS != (ret = orte_rmgr.signal_job(jobid, signum))) { + if (ORTE_SUCCESS != (ret = orte_pls.signal_job(jobid, signum))) { fprintf(stderr, "Signal %d could not be sent to the job (returned %d)", signum, ret); } diff --git a/orte/tools/orterun/totalview.c b/orte/tools/orterun/totalview.c index 11d14aea85..3a7fe793f2 100644 --- a/orte/tools/orterun/totalview.c +++ b/orte/tools/orterun/totalview.c @@ -65,7 +65,7 @@ extern char **environ; #include "opal/mca/base/base.h" #include "orte/mca/errmgr/errmgr.h" #include "orte/mca/rmgr/rmgr_types.h" -#include "orte/mca/rmaps/base/rmaps_base_map.h" +#include "orte/mca/rmaps/base/rmaps_private.h" #include "orte/runtime/runtime.h" #include "totalview.h" diff --git a/orte/util/proc_info.c b/orte/util/proc_info.c index df5a0ee831..97f5a34b1f 100644 --- a/orte/util/proc_info.c +++ b/orte/util/proc_info.c @@ -66,6 +66,12 @@ int orte_proc_info(void) id = mca_base_param_register_int("seed", NULL, NULL, NULL, orte_process_info.seed); mca_base_param_lookup_int(id, &tmp); orte_process_info.seed = OPAL_INT_TO_BOOL(tmp); + /* if we are a seed, then make sure the daemon flag is NOT set so that + * framework components are properly selected + */ + if (orte_process_info.seed) { + orte_process_info.daemon = false; + } id = mca_base_param_register_string("gpr", "replica", "uri", NULL, orte_process_info.gpr_replica_uri); mca_base_param_lookup_string(id, &(orte_process_info.gpr_replica_uri));