From cf6137b530e7478720fbf87da83d6a46c1774dbe Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Thu, 18 Jun 2015 09:53:20 -0700 Subject: [PATCH] Integrate PMIx 1.0 with OMPI. Bring Slurm PMI-1 component online Bring the s2 component online Little cleanup - let the various PMIx modules set the process name during init, and then just raise it up to the ORTE level. Required as the different PMI environments all pass the jobid in different ways. Bring the OMPI pubsub/pmi component online Get comm_spawn working again Ensure we always provide a cpuset, even if it is NULL pmix/cray: adjust cray pmix component for pmix Make changes so cray pmix can work within the integrated ompi/pmix framework. Bring singletons back online. Implement the comm_spawn operation using pmix - not tested yet Cleanup comm_spawn - procs now starting, error in connect_accept Complete integration --- .gitignore | 5 + ompi/Makefile.am | 2 + ompi/communicator/comm.c | 40 +- ompi/communicator/comm_cid.c | 4 +- ompi/communicator/comm_init.c | 5 +- ompi/dpm/Makefile.am | 18 + ompi/dpm/dpm.c | 1179 ++++++++++ ompi/dpm/dpm.h | 106 + ompi/mca/dpm/Makefile.am | 37 - ompi/mca/dpm/base/base.h | 101 - ompi/mca/dpm/base/dpm_base_common_fns.c | 289 --- ompi/mca/dpm/base/dpm_base_frame.c | 68 - ompi/mca/dpm/base/dpm_base_null_fns.c | 113 - ompi/mca/dpm/base/dpm_base_select.c | 61 - ompi/mca/dpm/base/owner.txt | 7 - ompi/mca/dpm/dpm.h | 233 -- ompi/mca/dpm/orte/Makefile.am | 43 - ompi/mca/dpm/orte/configure.m4 | 25 - ompi/mca/dpm/orte/dpm_orte.c | 1764 -------------- ompi/mca/dpm/orte/dpm_orte.h | 36 - ompi/mca/dpm/orte/dpm_orte_component.c | 67 - ompi/mca/dpm/orte/help-ompi-dpm-orte.txt | 43 - ompi/mca/dpm/orte/owner.txt | 7 - ompi/mca/mtl/mxm/mtl_mxm.c | 10 +- ompi/mca/mtl/ofi/mtl_ofi_compat.h | 6 +- ompi/mca/mtl/portals4/mtl_portals4.c | 2 +- .../mca/mtl/portals4/mtl_portals4_component.c | 2 +- ompi/mca/mtl/psm/mtl_psm.c | 4 +- ompi/mca/mtl/psm2/mtl_psm2.c | 4 +- ompi/mca/pml/base/pml_base_select.c | 4 +- ompi/mca/pml/yalla/pml_yalla.c | 4 +- ompi/mca/pubsub/Makefile.am | 37 - ompi/mca/pubsub/base/base.h | 54 - ompi/mca/pubsub/base/owner.txt | 7 - ompi/mca/pubsub/base/pubsub_base_frame.c | 64 - ompi/mca/pubsub/base/pubsub_base_null_fns.c | 38 - ompi/mca/pubsub/base/pubsub_base_select.c | 60 - ompi/mca/pubsub/orte/Makefile.am | 42 - ompi/mca/pubsub/orte/configure.m4 | 24 - .../mca/pubsub/orte/help-ompi-pubsub-orte.txt | 43 - ompi/mca/pubsub/orte/owner.txt | 7 - ompi/mca/pubsub/orte/pubsub_orte.c | 582 ----- ompi/mca/pubsub/orte/pubsub_orte.h | 51 - ompi/mca/pubsub/orte/pubsub_orte_component.c | 95 - ompi/mca/pubsub/pmi/Makefile.am | 40 - ompi/mca/pubsub/pmi/configure.m4 | 27 - ompi/mca/pubsub/pmi/owner.txt | 7 - ompi/mca/pubsub/pmi/pubsub_pmi.c | 128 - ompi/mca/pubsub/pmi/pubsub_pmi.h | 25 - ompi/mca/pubsub/pmi/pubsub_pmi_component.c | 90 - ompi/mca/pubsub/pubsub.h | 108 - ompi/mca/rte/orte/rte_orte.h | 4 +- ompi/mca/rte/orte/rte_orte_module.c | 3 +- .../topo_treematch_dist_graph_create.c | 13 +- .../pessimist/vprotocol_pessimist_eventlog.c | 57 +- ompi/mpi/c/close_port.c | 5 +- ompi/mpi/c/comm_accept.c | 7 +- ompi/mpi/c/comm_connect.c | 7 +- ompi/mpi/c/comm_disconnect.c | 5 +- ompi/mpi/c/comm_join.c | 51 +- ompi/mpi/c/comm_spawn.c | 11 +- ompi/mpi/c/comm_spawn_multiple.c | 11 +- ompi/mpi/c/lookup_name.c | 56 +- ompi/mpi/c/open_port.c | 5 +- ompi/mpi/c/publish_name.c | 76 +- ompi/mpi/c/unpublish_name.c | 50 +- ompi/proc/proc.c | 248 +- ompi/proc/proc.h | 13 +- ompi/runtime/ompi_mpi_finalize.c | 14 +- ompi/runtime/ompi_mpi_init.c | 50 +- opal/class/opal_hotel.h | 32 +- opal/dss/dss_load_unload.c | 10 +- opal/include/opal/constants.h | 3 +- opal/mca/btl/openib/btl_openib_component.c | 4 +- opal/mca/btl/openib/btl_openib_proc.c | 4 +- .../mca/btl/portals4/btl_portals4_component.c | 4 +- opal/mca/btl/scif/btl_scif_component.c | 4 +- opal/mca/btl/tcp/btl_tcp_component.c | 6 +- opal/mca/btl/tcp/btl_tcp_proc.c | 6 +- opal/mca/btl/ugni/btl_ugni_add_procs.c | 22 +- opal/mca/btl/usnic/btl_usnic_compat.c | 22 +- opal/mca/btl/vader/btl_vader_component.c | 4 +- opal/mca/btl/vader/btl_vader_module.c | 5 +- opal/mca/common/ugni/common_ugni.c | 4 +- opal/mca/common/ugni/common_ugni_ep.c | 4 +- opal/mca/dstore/Makefile.am | 32 - opal/mca/dstore/base/Makefile.am | 20 - opal/mca/dstore/base/base.h | 131 -- opal/mca/dstore/base/dstore_base_frame.c | 150 -- opal/mca/dstore/base/dstore_base_select.c | 105 - opal/mca/dstore/base/dstore_base_stubs.c | 276 --- opal/mca/dstore/base/help-dstore-base.txt | 19 - opal/mca/dstore/base/owner.txt | 7 - opal/mca/dstore/dstore.h | 222 -- opal/mca/dstore/dstore_types.h | 66 - opal/mca/dstore/hash/Makefile.am | 36 - opal/mca/dstore/hash/dstore_hash.h | 33 - opal/mca/dstore/hash/dstore_hash_component.c | 84 - opal/mca/dstore/hash/owner.txt | 7 - opal/mca/pmix/Makefile.am | 2 +- opal/mca/pmix/base/Makefile.am | 8 +- opal/mca/pmix/base/base.h | 8 + opal/mca/pmix/base/pmix_base_fns.c | 27 +- opal/mca/pmix/base/pmix_base_frame.c | 82 +- .../base/pmix_base_hash.c} | 177 +- opal/mca/pmix/base/pmix_base_hash.h | 37 + opal/mca/pmix/base/pmix_base_select.c | 18 +- opal/mca/pmix/cray/pmix_cray.c | 348 ++- opal/mca/pmix/cray/pmix_cray_component.c | 5 +- opal/mca/pmix/native/.opal_ignore | 0 opal/mca/pmix/native/Makefile.am | 36 - opal/mca/pmix/native/configure.m4 | 42 - opal/mca/pmix/native/pmix_native.h | 232 -- opal/mca/pmix/native/pmix_native_component.c | 139 -- opal/mca/pmix/native/usock.c | 471 ---- opal/mca/pmix/native/usock_sendrecv.c | 758 ------ opal/mca/pmix/pmix.h | 761 ++++-- opal/mca/pmix/pmix1xx/Makefile.am | 49 + opal/mca/pmix/pmix1xx/autogen.subdirs | 1 + opal/mca/pmix/pmix1xx/configure.m4 | 82 + opal/mca/pmix/pmix1xx/pmix/INSTALL | 88 + opal/mca/pmix/pmix1xx/pmix/LICENSE | 89 + opal/mca/pmix/pmix1xx/pmix/Makefile.am | 68 + opal/mca/pmix/pmix1xx/pmix/README | 395 ++++ opal/mca/pmix/pmix1xx/pmix/VERSION | 65 + opal/mca/pmix/pmix1xx/pmix/autogen.sh | 5 + opal/mca/pmix/pmix1xx/pmix/config/Makefile.am | 49 + .../pmix/pmix1xx/pmix/config/distscript.sh | 56 + opal/mca/pmix/pmix1xx/pmix/config/pmix.m4 | 759 ++++++ .../pmix/config/pmix_check_attributes.m4 | 591 +++++ .../pmix/config/pmix_check_broken_qsort.m4 | 55 + .../config/pmix_check_compiler_version.m4 | 99 + .../pmix1xx/pmix/config/pmix_check_ident.m4 | 97 + .../pmix1xx/pmix/config/pmix_check_munge.m4 | 83 + .../pmix1xx/pmix/config/pmix_check_package.m4 | 176 ++ .../pmix1xx/pmix/config/pmix_check_sasl.m4 | 80 + .../pmix1xx/pmix/config/pmix_check_vendor.m4 | 252 ++ .../pmix/config/pmix_check_visibility.m4 | 90 + .../config/pmix_ensure_contains_optflags.m4 | 74 + .../pmix1xx/pmix/config/pmix_functions.m4 | 540 +++++ .../pmix1xx/pmix/config/pmix_get_version.sh | 161 ++ .../pmix1xx/pmix/config/pmix_search_libs.m4 | 34 + .../pmix/pmix1xx/pmix/config/pmix_setup_cc.m4 | 309 +++ .../pmix1xx/pmix/config/pmix_setup_hwloc.m4 | 119 + .../pmix/config/pmix_setup_libevent.m4 | 129 ++ opal/mca/pmix/pmix1xx/pmix/configure.ac | 200 ++ .../pmix1xx/pmix/contrib/pmix-valgrind.supp | 48 + .../pmix/pmix1xx/pmix/examples/Makefile.am | 60 + opal/mca/pmix/pmix1xx/pmix/examples/README | 0 opal/mca/pmix/pmix1xx/pmix/examples/client.c | 158 ++ opal/mca/pmix/pmix1xx/pmix/examples/dmodex.c | 215 ++ opal/mca/pmix/pmix1xx/pmix/examples/dynamic.c | 192 ++ opal/mca/pmix/pmix1xx/pmix/examples/fault.c | 109 + opal/mca/pmix/pmix1xx/pmix/examples/pub.c | 161 ++ opal/mca/pmix/pmix1xx/pmix/examples/server.c | 627 +++++ .../mca/pmix/pmix1xx/pmix/include/Makefile.am | 42 + opal/mca/pmix/pmix1xx/pmix/include/pmi.h | 829 +++++++ opal/mca/pmix/pmix1xx/pmix/include/pmi2.h | 544 +++++ opal/mca/pmix/pmix1xx/pmix/include/pmix.h | 401 ++++ .../pmix/include/pmix/autogen/config.h.in | 191 ++ .../include/pmix/autogen/pmix_config_bottom.h | 432 ++++ .../include/pmix/autogen/pmix_config_top.h | 39 + .../pmix/include/pmix/pmix_common.h.in | 839 +++++++ .../pmix/pmix1xx/pmix/include/pmix/rename.h | 296 +++ .../pmix/pmix1xx/pmix/include/pmix_server.h | 453 ++++ .../pmix/pmix1xx/pmix/include/private/align.h | 29 + .../pmix/include/private/autogen/README.txt | 3 + .../pmix/include/private/hash_string.h | 69 + .../pmix/include/private/pmix_socket_errno.h | 26 + .../pmix/include/private/pmix_stdint.h | 325 +++ .../pmix1xx/pmix/include/private/prefetch.h | 37 + .../pmix/pmix1xx/pmix/include/private/types.h | 219 ++ .../pmix1xx/pmix/src/buffer_ops}/Makefile.am | 27 +- .../pmix1xx/pmix/src/buffer_ops/buffer_ops.h | 320 +++ .../pmix/pmix1xx/pmix/src/buffer_ops/copy.c | 432 ++++ .../pmix1xx/pmix/src/buffer_ops/internal.h | 471 ++++ .../pmix/src/buffer_ops/internal_functions.c | 121 + .../pmix1xx/pmix/src/buffer_ops/open_close.c | 599 +++++ .../pmix/pmix1xx/pmix/src/buffer_ops/pack.c | 853 +++++++ .../pmix/pmix1xx/pmix/src/buffer_ops/print.c | 869 +++++++ .../pmix/pmix1xx/pmix/src/buffer_ops/types.h | 105 + .../pmix/pmix1xx/pmix/src/buffer_ops/unpack.c | 1107 +++++++++ .../pmix/pmix1xx/pmix/src/class/Makefile.am | 35 + .../pmix1xx/pmix/src/class/pmix_hash_table.c | 606 +++++ .../pmix1xx/pmix/src/class/pmix_hash_table.h | 330 +++ .../pmix/pmix1xx/pmix/src/class/pmix_list.c | 260 +++ .../pmix/pmix1xx/pmix/src/class/pmix_list.h | 909 ++++++++ .../pmix/pmix1xx/pmix/src/class/pmix_object.c | 199 ++ .../pmix/pmix1xx/pmix/src/class/pmix_object.h | 501 ++++ .../pmix/src/class/pmix_pointer_array.c | 329 +++ .../pmix/src/class/pmix_pointer_array.h | 194 ++ .../pmix/pmix1xx/pmix/src/client/Makefile.am | 27 + opal/mca/pmix/pmix1xx/pmix/src/client/pmi1.c | 568 +++++ opal/mca/pmix/pmix1xx/pmix/src/client/pmi2.c | 525 +++++ .../pmix1xx/pmix/src/client/pmix_client.c | 1076 +++++++++ .../pmix/src/client/pmix_client_connect.c | 332 +++ .../pmix/src/client/pmix_client_fence.c | 252 ++ .../pmix1xx/pmix/src/client/pmix_client_get.c | 504 ++++ .../pmix1xx/pmix/src/client/pmix_client_ops.h | 33 + .../pmix1xx/pmix/src/client/pmix_client_pub.c | 510 ++++ .../pmix/src/client/pmix_client_spawn.c | 223 ++ .../pmix1xx/pmix/src/include}/Makefile.am | 18 +- .../pmix1xx/pmix/src/include/pmix_globals.c | 157 ++ .../pmix1xx/pmix/src/include/pmix_globals.h | 108 + .../mca/pmix/pmix1xx/pmix/src/sec/Makefile.am | 32 + .../pmix/pmix1xx/pmix/src/sec/pmix_munge.c | 145 ++ .../pmix/pmix1xx/pmix/src/sec/pmix_munge.h | 23 + .../pmix/pmix1xx/pmix/src/sec/pmix_native.c | 113 + .../pmix/pmix1xx/pmix/src/sec/pmix_native.h | 25 + .../mca/pmix/pmix1xx/pmix/src/sec/pmix_sasl.c | 95 + .../mca/pmix/pmix1xx/pmix/src/sec/pmix_sasl.h | 25 + opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_sec.c | 170 ++ opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_sec.h | 141 ++ .../pmix/pmix1xx/pmix/src/server/Makefile.am | 19 + .../pmix1xx/pmix/src/server/pmix_server.c | 2064 +++++++++++++++++ .../pmix/src/server/pmix_server_listener.c | 559 +++++ .../pmix1xx/pmix/src/server/pmix_server_ops.c | 1500 ++++++++++++ .../pmix1xx/pmix/src/server/pmix_server_ops.h | 242 ++ .../pmix/src/server/pmix_server_regex.c | 544 +++++ .../pmix/pmix1xx/pmix/src/usock/Makefile.am | 17 + opal/mca/pmix/pmix1xx/pmix/src/usock/usock.c | 320 +++ opal/mca/pmix/pmix1xx/pmix/src/usock/usock.h | 273 +++ .../pmix1xx/pmix/src/usock/usock_sendrecv.c | 460 ++++ .../pmix/pmix1xx/pmix/src/util/Makefile.am | 53 + opal/mca/pmix/pmix1xx/pmix/src/util/argv.c | 588 +++++ opal/mca/pmix/pmix1xx/pmix/src/util/argv.h | 302 +++ .../mca/pmix/pmix1xx/pmix/src/util/basename.c | 139 ++ .../mca/pmix/pmix1xx/pmix/src/util/basename.h | 114 + opal/mca/pmix/pmix1xx/pmix/src/util/crc.c | 1206 ++++++++++ opal/mca/pmix/pmix1xx/pmix/src/util/crc.h | 186 ++ opal/mca/pmix/pmix1xx/pmix/src/util/error.c | 141 ++ opal/mca/pmix/pmix1xx/pmix/src/util/error.h | 47 + opal/mca/pmix/pmix1xx/pmix/src/util/fd.c | 93 + opal/mca/pmix/pmix1xx/pmix/src/util/fd.h | 70 + opal/mca/pmix/pmix1xx/pmix/src/util/hash.c | 265 +++ opal/mca/pmix/pmix1xx/pmix/src/util/hash.h | 45 + .../pmix1xx/pmix/src/util/help-pmix-util.txt | 95 + opal/mca/pmix/pmix1xx/pmix/src/util/os_path.c | 106 + opal/mca/pmix/pmix1xx/pmix/src/util/os_path.h | 78 + opal/mca/pmix/pmix1xx/pmix/src/util/output.c | 964 ++++++++ opal/mca/pmix/pmix1xx/pmix/src/util/output.h | 561 +++++ .../pmix/pmix1xx/pmix/src/util/pmix_environ.c | 239 ++ .../pmix/pmix1xx/pmix/src/util/pmix_environ.h | 146 ++ opal/mca/pmix/pmix1xx/pmix/src/util/printf.c | 323 +++ opal/mca/pmix/pmix1xx/pmix/src/util/printf.h | 132 ++ .../pmix1xx/pmix/src/util/progress_threads.c | 128 + .../pmix1xx/pmix/src/util/progress_threads.h | 31 + opal/mca/pmix/pmix1xx/pmix/src/util/timings.c | 645 ++++++ opal/mca/pmix/pmix1xx/pmix/src/util/timings.h | 422 ++++ opal/mca/pmix/pmix1xx/pmix/test/Makefile.am | 59 + opal/mca/pmix/pmix1xx/pmix/test/README | 32 + opal/mca/pmix/pmix1xx/pmix/test/cli_stages.c | 253 ++ opal/mca/pmix/pmix1xx/pmix/test/cli_stages.h | 65 + opal/mca/pmix/pmix1xx/pmix/test/pmi2_client.c | 71 + opal/mca/pmix/pmix1xx/pmix/test/pmi_client.c | 76 + opal/mca/pmix/pmix1xx/pmix/test/pmix_client.c | 175 ++ opal/mca/pmix/pmix1xx/pmix/test/pmix_regex.c | 90 + opal/mca/pmix/pmix1xx/pmix/test/pmix_test.c | 189 ++ .../pmix/pmix1xx/pmix/test/server_callbacks.c | 299 +++ .../pmix/pmix1xx/pmix/test/server_callbacks.h | 54 + .../pmix/pmix1xx/pmix/test/simple/Makefile.am | 60 + .../pmix1xx/pmix/test/simple/simpclient.c | 160 ++ .../pmix1xx/pmix/test/simple/simpdmodex.c | 210 ++ .../pmix/pmix1xx/pmix/test/simple/simpdyn.c | 196 ++ .../pmix/pmix1xx/pmix/test/simple/simpft.c | 114 + .../pmix/pmix1xx/pmix/test/simple/simppub.c | 167 ++ .../pmix/pmix1xx/pmix/test/simple/simptest.c | 611 +++++ opal/mca/pmix/pmix1xx/pmix/test/test_cd.c | 88 + opal/mca/pmix/pmix1xx/pmix/test/test_cd.h | 17 + opal/mca/pmix/pmix1xx/pmix/test/test_common.c | 538 +++++ opal/mca/pmix/pmix1xx/pmix/test/test_common.h | 193 ++ opal/mca/pmix/pmix1xx/pmix/test/test_fence.c | 553 +++++ opal/mca/pmix/pmix1xx/pmix/test/test_fence.h | 18 + .../mca/pmix/pmix1xx/pmix/test/test_publish.c | 194 ++ .../mca/pmix/pmix1xx/pmix/test/test_publish.h | 16 + .../pmix1xx/pmix/test/test_resolve_peers.c | 123 + .../pmix1xx/pmix/test/test_resolve_peers.h | 16 + opal/mca/pmix/pmix1xx/pmix/test/test_spawn.c | 78 + opal/mca/pmix/pmix1xx/pmix/test/test_spawn.h | 16 + opal/mca/pmix/pmix1xx/pmix/test/utils.c | 240 ++ opal/mca/pmix/pmix1xx/pmix/test/utils.h | 20 + opal/mca/pmix/pmix1xx/pmix1.h | 162 ++ opal/mca/pmix/pmix1xx/pmix1_client.c | 958 ++++++++ opal/mca/pmix/pmix1xx/pmix1_server_north.c | 840 +++++++ opal/mca/pmix/pmix1xx/pmix1_server_south.c | 322 +++ opal/mca/pmix/pmix1xx/pmix_pmix1.c | 548 +++++ opal/mca/pmix/pmix1xx/pmix_pmix1_component.c | 100 + opal/mca/pmix/pmix_server.h | 210 ++ opal/mca/pmix/pmix_types.h | 296 +++ opal/mca/pmix/s1/pmix_s1.c | 471 ++-- opal/mca/pmix/s2/pmix_s2.c | 414 ++-- opal/mca/sec/base/base.h | 1 - opal/mca/sec/base/sec_base_stubs.c | 3 +- opal/mca/sec/basic/sec_basic.c | 39 +- opal/mca/sec/keystone/sec_keystone.c | 6 +- opal/mca/sec/munge/sec_munge.c | 8 +- opal/mca/sec/sec.h | 4 +- opal/util/alfg.h | 4 +- opal/util/net.c | 4 +- opal/util/proc.c | 28 +- opal/util/proc.h | 12 + orte/include/orte/types.h | 1 + orte/mca/dfs/app/dfs_app.c | 22 +- orte/mca/dfs/test/dfs_test.c | 43 +- .../errmgr/default_hnp/errmgr_default_hnp.c | 14 +- orte/mca/ess/base/ess_base_fns.c | 19 +- orte/mca/ess/base/ess_base_std_app.c | 30 +- orte/mca/ess/base/ess_base_std_orted.c | 18 +- orte/mca/ess/env/ess_env_module.c | 13 +- orte/mca/ess/hnp/ess_hnp_module.c | 18 +- orte/mca/ess/pmi/ess_pmi_component.c | 34 +- orte/mca/ess/pmi/ess_pmi_module.c | 347 ++- orte/mca/ess/singleton/ess_singleton_module.c | 607 +++-- orte/mca/grpcomm/base/grpcomm_base_stubs.c | 2 +- orte/mca/grpcomm/brks/grpcomm_brks.c | 44 +- orte/mca/grpcomm/direct/grpcomm_direct.c | 48 +- orte/mca/grpcomm/rcd/grpcomm_rcd.c | 46 +- orte/mca/odls/base/odls_base_default_fns.c | 15 + orte/mca/oob/base/oob_base_stubs.c | 13 +- orte/mca/oob/tcp/oob_tcp_connection.c | 6 +- orte/mca/oob/usock/oob_usock_connection.c | 2 - orte/mca/plm/alps/plm_alps_module.c | 2 + orte/mca/plm/base/plm_base_frame.c | 12 +- orte/mca/plm/base/plm_base_launch_support.c | 82 +- orte/mca/plm/base/plm_base_proxy.c | 167 +- orte/mca/plm/base/plm_base_receive.c | 16 +- orte/mca/rml/rml_types.h | 2 +- orte/orted/help-orted.txt | 20 +- orte/orted/orted_main.c | 21 +- orte/orted/pmix/Makefile.am | 13 +- orte/orted/pmix/pmix_server.c | 1189 +++------- orte/orted/pmix/pmix_server.h | 8 +- orte/orted/pmix/pmix_server_connection.c | 523 ----- orte/orted/pmix/pmix_server_db.c | 409 ---- orte/orted/pmix/pmix_server_dyn.c | 352 +++ orte/orted/pmix/pmix_server_fence.c | 243 ++ orte/orted/pmix/pmix_server_gen.c | 119 + orte/orted/pmix/pmix_server_internal.h | 290 +-- orte/orted/pmix/pmix_server_process_msgs.c | 838 ------- orte/orted/pmix/pmix_server_pub.c | 468 ++++ orte/orted/pmix/pmix_server_register_fns.c | 403 ++++ orte/orted/pmix/pmix_server_sendrecv.c | 832 ------- .../data_type_support/orte_dt_packing_fns.c | 9 +- .../data_type_support/orte_dt_unpacking_fns.c | 9 +- orte/runtime/orte_globals.c | 44 +- orte/runtime/orte_init.c | 55 +- orte/util/attr.c | 12 + orte/util/attr.h | 2 + orte/util/nidmap.c | 28 +- oshmem/mca/memheap/base/memheap_base_mkey.c | 3 +- test/class/opal_proc_table.c | 6 +- test/class/opal_tree.c | 2 + 352 files changed, 52392 insertions(+), 13152 deletions(-) create mode 100644 ompi/dpm/Makefile.am create mode 100644 ompi/dpm/dpm.c create mode 100644 ompi/dpm/dpm.h delete mode 100644 ompi/mca/dpm/Makefile.am delete mode 100644 ompi/mca/dpm/base/base.h delete mode 100644 ompi/mca/dpm/base/dpm_base_common_fns.c delete mode 100644 ompi/mca/dpm/base/dpm_base_frame.c delete mode 100644 ompi/mca/dpm/base/dpm_base_null_fns.c delete mode 100644 ompi/mca/dpm/base/dpm_base_select.c delete mode 100644 ompi/mca/dpm/base/owner.txt delete mode 100644 ompi/mca/dpm/dpm.h delete mode 100644 ompi/mca/dpm/orte/Makefile.am delete mode 100644 ompi/mca/dpm/orte/configure.m4 delete mode 100644 ompi/mca/dpm/orte/dpm_orte.c delete mode 100644 ompi/mca/dpm/orte/dpm_orte.h delete mode 100644 ompi/mca/dpm/orte/dpm_orte_component.c delete mode 100644 ompi/mca/dpm/orte/help-ompi-dpm-orte.txt delete mode 100644 ompi/mca/dpm/orte/owner.txt delete mode 100644 ompi/mca/pubsub/Makefile.am delete mode 100644 ompi/mca/pubsub/base/base.h delete mode 100644 ompi/mca/pubsub/base/owner.txt delete mode 100644 ompi/mca/pubsub/base/pubsub_base_frame.c delete mode 100644 ompi/mca/pubsub/base/pubsub_base_null_fns.c delete mode 100644 ompi/mca/pubsub/base/pubsub_base_select.c delete mode 100644 ompi/mca/pubsub/orte/Makefile.am delete mode 100644 ompi/mca/pubsub/orte/configure.m4 delete mode 100644 ompi/mca/pubsub/orte/help-ompi-pubsub-orte.txt delete mode 100644 ompi/mca/pubsub/orte/owner.txt delete mode 100644 ompi/mca/pubsub/orte/pubsub_orte.c delete mode 100644 ompi/mca/pubsub/orte/pubsub_orte.h delete mode 100644 ompi/mca/pubsub/orte/pubsub_orte_component.c delete mode 100644 ompi/mca/pubsub/pmi/Makefile.am delete mode 100644 ompi/mca/pubsub/pmi/configure.m4 delete mode 100644 ompi/mca/pubsub/pmi/owner.txt delete mode 100644 ompi/mca/pubsub/pmi/pubsub_pmi.c delete mode 100644 ompi/mca/pubsub/pmi/pubsub_pmi.h delete mode 100644 ompi/mca/pubsub/pmi/pubsub_pmi_component.c delete mode 100644 ompi/mca/pubsub/pubsub.h delete mode 100644 opal/mca/dstore/Makefile.am delete mode 100644 opal/mca/dstore/base/Makefile.am delete mode 100644 opal/mca/dstore/base/base.h delete mode 100644 opal/mca/dstore/base/dstore_base_frame.c delete mode 100644 opal/mca/dstore/base/dstore_base_select.c delete mode 100644 opal/mca/dstore/base/dstore_base_stubs.c delete mode 100644 opal/mca/dstore/base/help-dstore-base.txt delete mode 100644 opal/mca/dstore/base/owner.txt delete mode 100644 opal/mca/dstore/dstore.h delete mode 100644 opal/mca/dstore/dstore_types.h delete mode 100644 opal/mca/dstore/hash/Makefile.am delete mode 100644 opal/mca/dstore/hash/dstore_hash.h delete mode 100644 opal/mca/dstore/hash/dstore_hash_component.c delete mode 100644 opal/mca/dstore/hash/owner.txt rename opal/mca/{dstore/hash/dstore_hash.c => pmix/base/pmix_base_hash.c} (63%) create mode 100644 opal/mca/pmix/base/pmix_base_hash.h create mode 100644 opal/mca/pmix/native/.opal_ignore delete mode 100644 opal/mca/pmix/native/Makefile.am delete mode 100644 opal/mca/pmix/native/configure.m4 delete mode 100644 opal/mca/pmix/native/pmix_native.h delete mode 100644 opal/mca/pmix/native/pmix_native_component.c delete mode 100644 opal/mca/pmix/native/usock.c delete mode 100644 opal/mca/pmix/native/usock_sendrecv.c create mode 100644 opal/mca/pmix/pmix1xx/Makefile.am create mode 100644 opal/mca/pmix/pmix1xx/autogen.subdirs create mode 100644 opal/mca/pmix/pmix1xx/configure.m4 create mode 100644 opal/mca/pmix/pmix1xx/pmix/INSTALL create mode 100644 opal/mca/pmix/pmix1xx/pmix/LICENSE create mode 100644 opal/mca/pmix/pmix1xx/pmix/Makefile.am create mode 100644 opal/mca/pmix/pmix1xx/pmix/README create mode 100644 opal/mca/pmix/pmix1xx/pmix/VERSION create mode 100755 opal/mca/pmix/pmix1xx/pmix/autogen.sh create mode 100644 opal/mca/pmix/pmix1xx/pmix/config/Makefile.am create mode 100755 opal/mca/pmix/pmix1xx/pmix/config/distscript.sh create mode 100644 opal/mca/pmix/pmix1xx/pmix/config/pmix.m4 create mode 100644 opal/mca/pmix/pmix1xx/pmix/config/pmix_check_attributes.m4 create mode 100644 opal/mca/pmix/pmix1xx/pmix/config/pmix_check_broken_qsort.m4 create mode 100644 opal/mca/pmix/pmix1xx/pmix/config/pmix_check_compiler_version.m4 create mode 100644 opal/mca/pmix/pmix1xx/pmix/config/pmix_check_ident.m4 create mode 100644 opal/mca/pmix/pmix1xx/pmix/config/pmix_check_munge.m4 create mode 100644 opal/mca/pmix/pmix1xx/pmix/config/pmix_check_package.m4 create mode 100644 opal/mca/pmix/pmix1xx/pmix/config/pmix_check_sasl.m4 create mode 100644 opal/mca/pmix/pmix1xx/pmix/config/pmix_check_vendor.m4 create mode 100644 opal/mca/pmix/pmix1xx/pmix/config/pmix_check_visibility.m4 create mode 100644 opal/mca/pmix/pmix1xx/pmix/config/pmix_ensure_contains_optflags.m4 create mode 100644 opal/mca/pmix/pmix1xx/pmix/config/pmix_functions.m4 create mode 100755 opal/mca/pmix/pmix1xx/pmix/config/pmix_get_version.sh create mode 100644 opal/mca/pmix/pmix1xx/pmix/config/pmix_search_libs.m4 create mode 100644 opal/mca/pmix/pmix1xx/pmix/config/pmix_setup_cc.m4 create mode 100644 opal/mca/pmix/pmix1xx/pmix/config/pmix_setup_hwloc.m4 create mode 100644 opal/mca/pmix/pmix1xx/pmix/config/pmix_setup_libevent.m4 create mode 100644 opal/mca/pmix/pmix1xx/pmix/configure.ac create mode 100644 opal/mca/pmix/pmix1xx/pmix/contrib/pmix-valgrind.supp create mode 100644 opal/mca/pmix/pmix1xx/pmix/examples/Makefile.am create mode 100644 opal/mca/pmix/pmix1xx/pmix/examples/README create mode 100644 opal/mca/pmix/pmix1xx/pmix/examples/client.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/examples/dmodex.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/examples/dynamic.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/examples/fault.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/examples/pub.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/examples/server.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/include/Makefile.am create mode 100644 opal/mca/pmix/pmix1xx/pmix/include/pmi.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/include/pmi2.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/include/pmix.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/include/pmix/autogen/config.h.in create mode 100644 opal/mca/pmix/pmix1xx/pmix/include/pmix/autogen/pmix_config_bottom.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/include/pmix/autogen/pmix_config_top.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h.in create mode 100644 opal/mca/pmix/pmix1xx/pmix/include/pmix/rename.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/include/pmix_server.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/include/private/align.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/include/private/autogen/README.txt create mode 100644 opal/mca/pmix/pmix1xx/pmix/include/private/hash_string.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/include/private/pmix_socket_errno.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/include/private/pmix_stdint.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/include/private/prefetch.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/include/private/types.h rename {ompi/mca/dpm/base => opal/mca/pmix/pmix1xx/pmix/src/buffer_ops}/Makefile.am (50%) create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/buffer_ops.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/copy.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/internal.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/internal_functions.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/open_close.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/pack.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/print.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/types.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/buffer_ops/unpack.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/class/Makefile.am create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/class/pmix_hash_table.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/class/pmix_hash_table.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/class/pmix_list.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/class/pmix_list.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/class/pmix_object.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/class/pmix_object.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/class/pmix_pointer_array.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/class/pmix_pointer_array.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/client/Makefile.am create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/client/pmi1.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/client/pmi2.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_connect.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_fence.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_get.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_ops.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_pub.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/client/pmix_client_spawn.c rename {ompi/mca/pubsub/base => opal/mca/pmix/pmix1xx/pmix/src/include}/Makefile.am (64%) create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/include/pmix_globals.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/include/pmix_globals.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/sec/Makefile.am create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_munge.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_munge.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_native.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_native.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_sasl.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_sasl.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_sec.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/sec/pmix_sec.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/server/Makefile.am create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_listener.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_ops.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_ops.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/server/pmix_server_regex.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/usock/Makefile.am create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/usock/usock.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/usock/usock.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/usock/usock_sendrecv.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/util/Makefile.am create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/util/argv.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/util/argv.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/util/basename.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/util/basename.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/util/crc.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/util/crc.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/util/error.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/util/error.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/util/fd.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/util/fd.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/util/hash.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/util/hash.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/util/help-pmix-util.txt create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/util/os_path.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/util/os_path.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/util/output.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/util/output.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/util/pmix_environ.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/util/pmix_environ.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/util/printf.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/util/printf.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/util/progress_threads.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/util/progress_threads.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/util/timings.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/src/util/timings.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/Makefile.am create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/README create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/cli_stages.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/cli_stages.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/pmi2_client.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/pmi_client.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/pmix_client.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/pmix_regex.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/pmix_test.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/server_callbacks.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/server_callbacks.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/simple/Makefile.am create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/simple/simpclient.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/simple/simpdmodex.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/simple/simpdyn.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/simple/simpft.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/simple/simppub.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/simple/simptest.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/test_cd.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/test_cd.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/test_common.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/test_common.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/test_fence.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/test_fence.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/test_publish.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/test_publish.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/test_resolve_peers.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/test_resolve_peers.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/test_spawn.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/test_spawn.h create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/utils.c create mode 100644 opal/mca/pmix/pmix1xx/pmix/test/utils.h create mode 100644 opal/mca/pmix/pmix1xx/pmix1.h create mode 100644 opal/mca/pmix/pmix1xx/pmix1_client.c create mode 100644 opal/mca/pmix/pmix1xx/pmix1_server_north.c create mode 100644 opal/mca/pmix/pmix1xx/pmix1_server_south.c create mode 100644 opal/mca/pmix/pmix1xx/pmix_pmix1.c create mode 100644 opal/mca/pmix/pmix1xx/pmix_pmix1_component.c create mode 100644 opal/mca/pmix/pmix_server.h create mode 100644 opal/mca/pmix/pmix_types.h delete mode 100644 orte/orted/pmix/pmix_server_connection.c delete mode 100644 orte/orted/pmix/pmix_server_db.c create mode 100644 orte/orted/pmix/pmix_server_dyn.c create mode 100644 orte/orted/pmix/pmix_server_fence.c create mode 100644 orte/orted/pmix/pmix_server_gen.c delete mode 100644 orte/orted/pmix/pmix_server_process_msgs.c create mode 100644 orte/orted/pmix/pmix_server_pub.c create mode 100644 orte/orted/pmix/pmix_server_register_fns.c delete mode 100644 orte/orted/pmix/pmix_server_sendrecv.c diff --git a/.gitignore b/.gitignore index 1e993cc4e7..9dd15b9dd8 100644 --- a/.gitignore +++ b/.gitignore @@ -300,6 +300,11 @@ opal/mca/hwloc/hwloc*/hwloc/include/private/autogen/config.h opal/mca/installdirs/config/install_dirs.h +opal/mca/pmix/pmix1xx/pmix/include/pmix/autogen/config.h +opal/mca/pmix/pmix1xx/pmix/include/pmix/pmix_common.h +opal/mca/pmix/pmix1xx/pmix/include/private/autogen/config.h +opal/mca/pmix/pmix1xx/pmix/include/private/autogen/config.h.in + opal/tools/opal-checkpoint/opal-checkpoint opal/tools/opal-checkpoint/opal-checkpoint.1 opal/tools/opal-restart/opal-restart diff --git a/ompi/Makefile.am b/ompi/Makefile.am index a86926d80a..8d7b689fc0 100644 --- a/ompi/Makefile.am +++ b/ompi/Makefile.am @@ -14,6 +14,7 @@ # Copyright (c) 2010-2011 Sandia National Laboratories. All rights reserved. # Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights # reserved. +# Copyright (c) 2015 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -172,6 +173,7 @@ endif include class/Makefile.am include attribute/Makefile.am include communicator/Makefile.am +include dpm/Makefile.am include errhandler/Makefile.am include file/Makefile.am include group/Makefile.am diff --git a/ompi/communicator/comm.c b/ompi/communicator/comm.c index 5a1f8edaab..6e6db1a19e 100644 --- a/ompi/communicator/comm.c +++ b/ompi/communicator/comm.c @@ -20,7 +20,7 @@ * All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,8 +34,8 @@ #include "ompi/constants.h" #include "opal/mca/hwloc/base/base.h" -#include "opal/mca/dstore/dstore.h" #include "opal/dss/dss.h" +#include "opal/mca/pmix/pmix.h" #include "ompi/proc/proc.h" #include "opal/threads/mutex.h" @@ -43,7 +43,7 @@ #include "opal/util/output.h" #include "ompi/mca/topo/topo.h" #include "ompi/mca/topo/base/base.h" -#include "ompi/mca/dpm/dpm.h" +#include "ompi/dpm/dpm.h" #include "ompi/attribute/attribute.h" #include "ompi/communicator/communicator.h" @@ -202,7 +202,7 @@ int ompi_comm_set_nb ( ompi_communicator_t **ncomm, Necessary for the disconnect of dynamic communicators. */ if ( 0 < local_size && (OMPI_COMM_IS_INTRA(newcomm) || 0 c_local_group)) { rc = ompi_proc_pack(local_comm->c_local_group->grp_proc_pointers, - local_size, true, sbuf); + local_size, sbuf); } /* get the proc list for the sparse implementations */ else { @@ -1788,7 +1786,7 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm, sizeof (ompi_proc_t *)); for(i=0 ; ic_local_group->grp_proc_count ; i++) proc_list[i] = ompi_group_peer_lookup(local_comm->c_local_group,i); - rc = ompi_proc_pack (proc_list, local_size, true, sbuf); + rc = ompi_proc_pack (proc_list, local_size, sbuf); } if ( OMPI_SUCCESS != rc ) { goto err_exit; @@ -1867,7 +1865,7 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm, } /* decode the names into a proc-list */ - rc = ompi_proc_unpack(rbuf, rsize, &rprocs, true, NULL, NULL); + rc = ompi_proc_unpack(rbuf, rsize, &rprocs, NULL, NULL); OBJ_RELEASE(rbuf); if (OMPI_SUCCESS != rc) { OMPI_ERROR_LOG(rc); @@ -1876,22 +1874,16 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm, /* set the locality of the remote procs */ for (i=0; i < rsize; i++) { - /* get the locality information - do not use modex recv for - * this request as that will automatically cause the hostname - * to be loaded as well. All RTEs are required to provide this - * information at startup for procs on our node. Thus, not - * finding the info indicates that the proc is non-local. - */ - OBJ_CONSTRUCT(&myvals, opal_list_t); - if (OMPI_SUCCESS != opal_dstore.fetch(opal_dstore_internal, - &rprocs[i]->super.proc_name, - OPAL_DSTORE_LOCALITY, &myvals)) { - rprocs[i]->super.proc_flags = OPAL_PROC_NON_LOCAL; + /* get the locality information - all RTEs are required + * to provide this information at startup */ + uint16_t *u16ptr, u16; + u16ptr = &u16; + OPAL_MODEX_RECV_VALUE(rc, OPAL_PMIX_LOCALITY, &rprocs[i]->super.proc_name, &u16ptr, OPAL_UINT16); + if (OPAL_SUCCESS == rc) { + rprocs[i]->super.proc_flags = u16; } else { - kv = (opal_value_t*)opal_list_get_first(&myvals); - rprocs[i]->super.proc_flags = kv->data.uint16; + rprocs[i]->super.proc_flags = OPAL_PROC_NON_LOCAL; } - OPAL_LIST_DESTRUCT(&myvals); } /* And now add the information into the database */ @@ -2210,7 +2202,7 @@ static int ompi_comm_fill_rest(ompi_communicator_t *comm, if( MPI_UNDEFINED != my_rank ) { /* verify whether to set the flag, that this comm contains process from more than one jobid. */ - ompi_dpm.mark_dyncomm (comm); + ompi_dpm_mark_dyncomm (comm); } /* set the error handler */ diff --git a/ompi/communicator/comm_cid.c b/ompi/communicator/comm_cid.c index 3ecc8beebc..59e812a5fd 100644 --- a/ompi/communicator/comm_cid.c +++ b/ompi/communicator/comm_cid.c @@ -164,7 +164,9 @@ int ompi_comm_cid_init (void) for ( i=0; isuper, (uint8_t**)&tlpointer, &size); + OPAL_MODEX_RECV_STRING(ret, "MPI_THREAD_LEVEL", + &thisproc->super.proc_name, + (uint8_t**)&tlpointer, &size); if (OMPI_SUCCESS == ret) { thread_level = *((uint8_t *) tlpointer); if ( OMPI_THREADLEVEL_IS_MULTIPLE (thread_level) ) { diff --git a/ompi/communicator/comm_init.c b/ompi/communicator/comm_init.c index 3cf3ebbc22..2b761d93df 100644 --- a/ompi/communicator/comm_init.c +++ b/ompi/communicator/comm_init.c @@ -20,6 +20,7 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -39,7 +40,7 @@ #include "ompi/runtime/params.h" #include "ompi/communicator/communicator.h" #include "ompi/attribute/attribute.h" -#include "ompi/mca/dpm/dpm.h" +#include "ompi/dpm/dpm.h" #include "ompi/memchecker.h" /* @@ -236,7 +237,7 @@ int ompi_comm_finalize(void) OBJ_DESTRUCT( &ompi_mpi_comm_self ); /* disconnect all dynamic communicators */ - ompi_dpm.dyn_finalize(); + ompi_dpm_dyn_finalize(); /* Free the attributes on comm world. This is not done in the * destructor as we delete attributes in ompi_comm_free (which diff --git a/ompi/dpm/Makefile.am b/ompi/dpm/Makefile.am new file mode 100644 index 0000000000..43a8bbf14e --- /dev/null +++ b/ompi/dpm/Makefile.am @@ -0,0 +1,18 @@ +# -*- makefile -*- +# +# Copyright (c) 2015 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This makefile.am does not stand on its own - it is included from ompi/Makefile.am + +headers += \ + dpm/dpm.h + +libmpi_la_SOURCES += \ + dpm/dpm.c + diff --git a/ompi/dpm/dpm.c b/ompi/dpm/dpm.c new file mode 100644 index 0000000000..3febefad23 --- /dev/null +++ b/ompi/dpm/dpm.c @@ -0,0 +1,1179 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2006-2009 University of Houston. All rights reserved. + * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights + * reserved. + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2014-2015 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#include "ompi_config.h" +#include "ompi/constants.h" + +#include +#include +#include +#include +#if HAVE_SYS_TIME_H +#include +#endif + +#include "opal/util/alfg.h" +#include "opal/util/argv.h" +#include "opal/util/opal_getcwd.h" +#include "opal/util/proc.h" +#include "opal/dss/dss.h" +#include "opal/mca/hwloc/base/base.h" +#include "opal/mca/pmix/pmix.h" + +#include "ompi/communicator/communicator.h" +#include "ompi/group/group.h" +#include "ompi/proc/proc.h" +#include "ompi/mca/pml/pml.h" +#include "ompi/mca/rte/rte.h" +#include "ompi/info/info.h" + +#include "ompi/dpm/dpm.h" + +static opal_rng_buff_t rnd; + +typedef struct { + ompi_communicator_t *comm; + int size; + struct ompi_request_t **reqs; + int buf; +} ompi_dpm_disconnect_obj; +static int disconnect_waitall (int count, ompi_dpm_disconnect_obj **objs); +static ompi_dpm_disconnect_obj *disconnect_init(ompi_communicator_t *comm); + +typedef struct { + opal_list_item_t super; + ompi_proc_t *p; +} ompi_dpm_proct_caddy_t; +static OBJ_CLASS_INSTANCE(ompi_dpm_proct_caddy_t, + opal_list_item_t, + NULL, NULL); + +struct lookup_caddy_t { + bool active; + opal_pmix_pdata_t *pdat; +}; + +static void lookup_cbfunc(int status, opal_list_t *data, void *cbdata) +{ + struct lookup_caddy_t *cd = (struct lookup_caddy_t*)cbdata; + opal_pmix_pdata_t *p = (opal_pmix_pdata_t*)opal_list_get_first(data); + if (NULL != p && OPAL_STRING == p->value.type && + NULL != p->value.data.string) { + cd->pdat->value.data.string = strdup(p->value.data.string); + } + cd->active = false; +} + + +/* + * Init the module + */ +int ompi_dpm_init(void) +{ + time_t now; + + /* seed our random number generator */ + now = time(NULL); + if (!opal_srand(&rnd, now)) { + return OMPI_ERROR; + } + return OMPI_SUCCESS; +} + +int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, + const char *port_string, bool send_first, + ompi_communicator_t **newcomm) +{ + int size, rsize, rank, rc; + char **members = NULL, *nstring; + bool dense, isnew; + opal_process_name_t pname; + opal_list_t ilist, mlist, rlist; + opal_pmix_info_t *info; + opal_pmix_pdata_t *pdat; + opal_namelist_t *nm; + + ompi_communicator_t *newcomp=MPI_COMM_NULL; + ompi_proc_t *proc; + ompi_group_t *group=comm->c_local_group; + ompi_proc_t **proc_list=NULL, **new_proc_list = NULL; + int32_t i,j; + ompi_group_t *new_group_pointer; + ompi_dpm_proct_caddy_t *cd; + + /* set default error return */ + *newcomm = MPI_COMM_NULL; + + size = ompi_comm_size ( comm ); + rank = ompi_comm_rank ( comm ); + + /* the "send_first" end will append ":connect" to the port name and publish + * the list of its participating procs on that key. The receiving root proc + * will append ":accept" to the port name and publish the list of its + * participants on that key. Each proc will then block waiting for lookup + * to complete on the other's key. Once that completes, the list of remote + * procs is used to complete construction of the intercommunicator. */ + + /* everyone constructs the list of members from their communicator */ + if (MPI_COMM_WORLD == comm) { + pname.jobid = OMPI_PROC_MY_NAME->jobid; + pname.vpid = OPAL_VPID_WILDCARD; + rc = opal_convert_process_name_to_string(&nstring, &pname); + if (OPAL_SUCCESS != rc) { + return OMPI_ERROR; + } + opal_argv_append_nosize(&members, nstring); + free(nstring); + } else { + if (OMPI_GROUP_IS_DENSE(group)) { + proc_list = group->grp_proc_pointers; + dense = true; + } else { + proc_list = (ompi_proc_t**)calloc(group->grp_proc_count, + sizeof(ompi_proc_t *)); + for (i=0 ; igrp_proc_count ; i++) { + if (NULL == (proc_list[i] = ompi_group_peer_lookup(group,i))) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + rc = ORTE_ERR_NOT_FOUND; + goto exit; + } + } + dense = false; + } + for (i=0; i < size; i++) { + rc = opal_convert_process_name_to_string(&nstring, &(proc_list[i]->super.proc_name)); + if (OPAL_SUCCESS != rc) { + return OMPI_ERROR; + } + opal_argv_append_nosize(&members, nstring); + free(nstring); + } + if (!dense) { + free(proc_list); + } + } + + if (rank == root) { + /* put my name at the front of the list of members - my + * name will therefore be on the list twice, but the + * other side needs to know the root from this side */ + rc = opal_convert_process_name_to_string(&nstring, OMPI_PROC_MY_NAME); + if (OPAL_SUCCESS != rc) { + return OMPI_ERROR; + } + opal_argv_prepend_nosize(&members, nstring); + free(nstring); + /* the root for each side publishes their list of participants */ + OBJ_CONSTRUCT(&ilist, opal_list_t); + info = OBJ_NEW(opal_pmix_info_t); + opal_list_append(&ilist, &info->super); + + if (send_first) { + (void)asprintf(&info->key, "%s:connect", port_string); + info->value.type = OPAL_STRING; + info->value.data.string = opal_argv_join(members, ':'); + } else { + (void)asprintf(&info->key, "%s:accept", port_string); + info->value.type = OPAL_STRING; + info->value.data.string = opal_argv_join(members, ':'); + } + /* publish it with "session" scope */ + rc = opal_pmix.publish(OPAL_PMIX_SESSION, + OPAL_PMIX_PERSIST_APP, + &ilist); + OPAL_LIST_DESTRUCT(&ilist); + if (OPAL_SUCCESS != rc) { + opal_argv_free(members); + return OMPI_ERROR; + } + } + + /* lookup the other side's info - if a non-blocking form + * of lookup isn't available, then we use the blocking + * form and trust that the underlying system will WAIT + * until the other side publishes its data */ + OBJ_CONSTRUCT(&ilist, opal_list_t); + pdat = OBJ_NEW(opal_pmix_pdata_t); + if (send_first) { + (void)asprintf(&pdat->key, "%s:accept", port_string); + } else { + (void)asprintf(&pdat->key, "%s:connect", port_string); + } + opal_list_append(&ilist, &pdat->super); + if (NULL == opal_pmix.lookup_nb) { + rc = opal_pmix.lookup(OPAL_PMIX_SESSION, &ilist); + if (OPAL_SUCCESS != rc) { + OPAL_LIST_DESTRUCT(&ilist); + opal_argv_free(members); + return OMPI_ERROR; + } + } else { + /* specifically request that the lookup wait until + * the given data has been published */ + char **keys = NULL; + struct lookup_caddy_t caddy; + opal_argv_append_nosize(&keys, pdat->key); + caddy.active = true; + caddy.pdat = pdat; + rc = opal_pmix.lookup_nb(OPAL_PMIX_SESSION, true, keys, + lookup_cbfunc, &caddy); + if (OPAL_SUCCESS != rc) { + OPAL_LIST_DESTRUCT(&ilist); + opal_argv_free(keys); + opal_argv_free(members); + return OMPI_ERROR; + } + OMPI_WAIT_FOR_COMPLETION(caddy.active); + } + /* initiate a list of participants for the connect, + * starting with our own members, remembering to + * skip the first member if we are the root rank */ + if (rank == root) { + j = 1; + } else { + j = 0; + } + OBJ_CONSTRUCT(&mlist, opal_list_t); + for (i=j; NULL != members[i]; i++) { + nm = OBJ_NEW(opal_namelist_t); + opal_convert_string_to_process_name(&nm->name, members[i]); + opal_list_append(&mlist, &nm->super); + } + opal_argv_free(members); + members = NULL; + + /* the pdat object will contain a colon-delimited list + * of process names for the remote procs - convert it + * into an argv array */ + members = opal_argv_split(pdat->value.data.string, ':'); + OPAL_LIST_DESTRUCT(&ilist); + + /* the first entry is the root for the remote side */ + opal_convert_string_to_process_name(&pname, members[0]); + + /* add the list of remote procs to our list, and + * keep a list of them for later */ + OBJ_CONSTRUCT(&ilist, opal_list_t); + OBJ_CONSTRUCT(&rlist, opal_list_t); + for (i=1; NULL != members[i]; i++) { + nm = OBJ_NEW(opal_namelist_t); + opal_convert_string_to_process_name(&nm->name, members[i]); + opal_list_append(&mlist, &nm->super); + /* see if this needs to be added to our ompi_proc_t array */ + proc = ompi_proc_find_and_add(&nm->name, &isnew); + if (isnew) { + cd = OBJ_NEW(ompi_dpm_proct_caddy_t); + cd->p = proc; + opal_list_append(&ilist, &cd->super); + } + /* either way, add to the remote list */ + cd = OBJ_NEW(ompi_dpm_proct_caddy_t); + cd->p = proc; + opal_list_append(&rlist, &cd->super); + } + opal_argv_free(members); + + /* tell the host RTE to connect us - this will download + * all known data for the nspace's of participating procs */ + rc = opal_pmix.connect(&mlist); + OPAL_LIST_DESTRUCT(&mlist); + +#if 0 + /* set the locality of the new procs - the required info should + * have been included in the data exchange */ + for (j=0; j < new_proc_len; j++) { + OBJ_CONSTRUCT(&myvals, opal_list_t); + if (OMPI_SUCCESS != (rc = opal_dstore.fetch(opal_dstore_internal, + &new_proc_list[j]->super.proc_name, + OPAL_DSTORE_LOCALITY, &myvals))) { + new_proc_list[j]->super.proc_flags = OPAL_PROC_NON_LOCAL; + } else { + kv = (opal_value_t*)opal_list_get_first(&myvals); + new_proc_list[j]->super.proc_flags = kv->data.uint16; + } + OPAL_LIST_DESTRUCT(&myvals); + } +#endif + if (0 < opal_list_get_size(&ilist)) { + /* convert the list of new procs to a proc_t array */ + new_proc_list = (ompi_proc_t**)calloc(opal_list_get_size(&ilist), + sizeof(ompi_proc_t *)); + i = 0; + OPAL_LIST_FOREACH(cd, &ilist, ompi_dpm_proct_caddy_t) { + new_proc_list[i++] = cd->p; + } + /* call add_procs on the new ones */ + rc = MCA_PML_CALL(add_procs(new_proc_list, opal_list_get_size(&ilist))); + free(new_proc_list); + if (OMPI_SUCCESS != rc) { + OMPI_ERROR_LOG(rc); + OPAL_LIST_DESTRUCT(&ilist); + goto exit; + } + } + OPAL_LIST_DESTRUCT(&ilist); + + /* now deal with the remote group */ + rsize = opal_list_get_size(&rlist); + new_group_pointer=ompi_group_allocate(rsize); + if (NULL == new_group_pointer) { + rc = OMPI_ERR_OUT_OF_RESOURCE; + OPAL_LIST_DESTRUCT(&rlist); + goto exit; + } + /* assign group elements */ + i=0; + OPAL_LIST_FOREACH(cd, &rlist, ompi_dpm_proct_caddy_t) { + new_group_pointer->grp_proc_pointers[i++] = cd->p; + } + OPAL_LIST_DESTRUCT(&rlist); + + /* increment proc reference counters */ + ompi_group_increment_proc_count(new_group_pointer); + + /* set up communicator structure */ + rc = ompi_comm_set ( &newcomp, /* new comm */ + comm, /* old comm */ + group->grp_proc_count, /* local_size */ + NULL, /* local_procs */ + rsize, /* remote_size */ + NULL , /* remote_procs */ + NULL, /* attrs */ + comm->error_handler, /* error handler */ + NULL, /* topo component */ + group, /* local group */ + new_group_pointer /* remote group */ + ); + if ( NULL == newcomp ) { + rc = OMPI_ERR_OUT_OF_RESOURCE; + goto exit; + } + + ompi_group_decrement_proc_count (new_group_pointer); + OBJ_RELEASE(new_group_pointer); + new_group_pointer = MPI_GROUP_NULL; + + /* allocate comm_cid */ + rc = ompi_comm_nextcid ( newcomp, /* new communicator */ + comm, /* old communicator */ + NULL, /* bridge comm */ + &root, /* local leader */ + &pname, /* remote leader */ + OMPI_COMM_CID_INTRA_OOB, /* mode */ + send_first ); /* send or recv first */ + if (OMPI_SUCCESS != rc) { + goto exit; + } + + /* activate comm and init coll-component */ + rc = ompi_comm_activate ( &newcomp, /* new communicator */ + comm, /* old communicator */ + NULL, /* bridge comm */ + &root, /* local leader */ + &pname, /* remote leader */ + OMPI_COMM_CID_INTRA_OOB, /* mode */ + send_first ); /* send or recv first */ + if (OMPI_SUCCESS != rc) { + goto exit; + } + + /* Question: do we have to re-start some low level stuff + to enable the usage of fast communication devices + between the two worlds ? + */ + + exit: + if (NULL != proc_list) { + free (proc_list); + } + if (NULL != new_proc_list) { + free (new_proc_list); + } + if (OMPI_SUCCESS != rc) { + if (MPI_COMM_NULL != newcomp && NULL != newcomp) { + OBJ_RETAIN(newcomp); + newcomp = MPI_COMM_NULL; + } + } + + *newcomm = newcomp; + return rc; +} + +static int construct_peers(ompi_group_t *group, opal_list_t *peers) +{ + int i; + opal_namelist_t *nm, *n2; + ompi_proc_t *proct; + + if (OMPI_GROUP_IS_DENSE(group)) { + for (i=0; i < group->grp_proc_count; i++) { + if (NULL == (proct = group->grp_proc_pointers[i])) { + OMPI_ERROR_LOG(ORTE_ERR_NOT_FOUND); + return OMPI_ERR_NOT_FOUND; + } + /* add to the list of peers */ + nm = OBJ_NEW(opal_namelist_t); + nm->name = *(opal_process_name_t*)&proct->super.proc_name; + /* need to maintain an ordered list to ensure the tracker signatures + * match across all procs */ + OPAL_LIST_FOREACH(n2, peers, opal_namelist_t) { + if (opal_compare_proc(nm->name, n2->name) < 0) { + opal_list_insert_pos(peers, &n2->super, &nm->super); + nm = NULL; + break; + } + } + if (NULL != nm) { + /* append to the end */ + opal_list_append(peers, &nm->super); + } + } + } else { + for (i=0; i < group->grp_proc_count; i++) { + /* lookup this proc_t to get the process name */ + if (NULL == (proct = ompi_group_peer_lookup(group, i))) { + OMPI_ERROR_LOG(OMPI_ERR_NOT_FOUND); + return OMPI_ERR_NOT_FOUND; + } + /* add to the list of peers */ + nm = OBJ_NEW(opal_namelist_t); + nm->name = *(opal_process_name_t*)&proct->super.proc_name; + /* need to maintain an ordered list to ensure the tracker signatures + * match across all procs */ + OPAL_LIST_FOREACH(n2, peers, opal_namelist_t) { + if (opal_compare_proc(nm->name, n2->name) < 0) { + opal_list_insert_pos(peers, &n2->super, &nm->super); + nm = NULL; + break; + } + } + if (NULL != nm) { + /* append to the end */ + opal_list_append(peers, &nm->super); + } + } + } + return ORTE_SUCCESS; +} + +int ompi_dpm_disconnect(ompi_communicator_t *comm) +{ + int ret; + ompi_group_t *group; + opal_list_t coll; + + /* Note that we explicitly use an RTE-based barrier (vs. an MPI + barrier). See a lengthy comment in + ompi/runtime/ompi_mpi_finalize.c for a much more detailed + rationale. */ + + /* setup the collective */ + OBJ_CONSTRUCT(&coll, opal_list_t); + /* RHC: assuming for now that this must flow across all + * local and remote group members */ + group = comm->c_local_group; + if (OMPI_SUCCESS != (ret = construct_peers(group, &coll))) { + OMPI_ERROR_LOG(ret); + OPAL_LIST_DESTRUCT(&coll); + return ret; + } + /* do the same for the remote group */ + group = comm->c_remote_group; + if (OMPI_SUCCESS != (ret = construct_peers(group, &coll))) { + OMPI_ERROR_LOG(ret); + OPAL_LIST_DESTRUCT(&coll); + return ret; + } + + opal_pmix.fence(&coll, false); + OPAL_LIST_DESTRUCT(&coll); + + return OMPI_SUCCESS; +} + +int ompi_dpm_spawn(int count, const char *array_of_commands[], + char **array_of_argv[], + const int array_of_maxprocs[], + const MPI_Info array_of_info[], + const char *port_name) +{ + int rc, i, j; + int have_wdir=0; + int flag=0; + char cwd[OPAL_PATH_MAX]; + char host[OPAL_MAX_INFO_VAL]; /*** should define OMPI_HOST_MAX ***/ + char prefix[OPAL_MAX_INFO_VAL]; + char stdin_target[OPAL_MAX_INFO_VAL]; + char params[OPAL_MAX_INFO_VAL]; + char mapper[OPAL_MAX_INFO_VAL]; + char slot_list[OPAL_MAX_INFO_VAL]; + uint32_t ui32; + bool personality = false; + opal_jobid_t jobid; + + opal_list_t apps; + opal_list_t job_info; + opal_pmix_app_t *app; + opal_pmix_info_t *info; + bool local_spawn, non_mpi; + char **envars; + + /* parse the info object */ + /* check potentially for: + - "host": desired host where to spawn the processes + - "hostfile": hostfile containing hosts where procs are + to be spawned + - "add-host": add the specified hosts to the known list + of available resources and spawn these + procs on them + - "add-hostfile": add the hosts in the hostfile to the + known list of available resources and spawn + these procs on them + - "env": a newline-delimited list of envar values to be + placed into the app's environment (of form "foo=bar") + - "ompi_prefix": the path to the root of the directory tree where ompi + executables and libraries can be found on all nodes + used to spawn these procs + - "arch": desired architecture + - "wdir": directory, where executable can be found + - "path": list of directories where to look for the executable + - "file": filename, where additional information is provided. + - "soft": see page 92 of MPI-2. + - "mapper": indicate the mapper to be used for the job + - "display_map": display the map of the spawned job + - "npernode": number of procs/node to spawn + - "pernode": spawn one proc/node + - "ppr": spawn specified number of procs per specified object + - "map_by": specify object by which the procs should be mapped + - "rank_by": specify object by which the procs should be ranked + - "bind_to": specify object to which the procs should be bound + - "ompi_preload_binary": move binaries to nodes prior to execution + - "ompi_preload_files": move specified files to nodes prior to execution + - "ompi_non_mpi": spawned job will not call MPI_Init + - "ompi_param": list of MCA params to be in the spawned job's environment + - "env": newline (\n) delimited list of envar values to be passed to spawned procs + */ + + /* setup the job object */ + OBJ_CONSTRUCT(&job_info, opal_list_t); + OBJ_CONSTRUCT(&apps, opal_list_t); + + /* Convert the list of commands to list of opal_pmix_app_t */ + for (i = 0; i < count; ++i) { + app = OBJ_NEW(opal_pmix_app_t); + if (NULL == app) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + OPAL_LIST_DESTRUCT(&apps); + opal_progress_event_users_decrement(); + return ORTE_ERR_OUT_OF_RESOURCE; + } + /* add the app to the job data */ + opal_list_append(&apps, &app->super); + + /* copy over the name of the executable */ + app->cmd = strdup(array_of_commands[i]); + opal_argv_append(&app->argc, &app->argv, app->cmd); + + /* record the number of procs to be generated */ + app->maxprocs = array_of_maxprocs[i]; + + /* copy over the argv array */ + if (MPI_ARGVS_NULL != array_of_argv && + MPI_ARGV_NULL != array_of_argv[i]) { + for (j=1; NULL != array_of_argv[i][j]; j++) { + opal_argv_append(&app->argc, &app->argv, array_of_argv[i][j-1]); + } + } + + /* Add environment variable with the contact information for the + child processes. + */ + opal_setenv("OMPI_PARENT_PORT", port_name, true, &app->env); + for (j = 0; NULL != environ[j]; ++j) { + if (0 == strncmp(OPAL_MCA_PREFIX, environ[j], strlen(OPAL_MCA_PREFIX))) { + opal_argv_append_nosize(&app->env, environ[j]); + } + } + + /* Check for well-known info keys */ + have_wdir = 0; + if ( array_of_info != NULL && array_of_info[i] != MPI_INFO_NULL ) { + + /* check for personality - this is a job-level key */ + ompi_info_get (array_of_info[i], "personality", sizeof(host) - 1, host, &flag); + if ( flag ) { + personality = true; + info = OBJ_NEW(opal_pmix_info_t); + info->key = strdup(OPAL_PMIX_PERSONALITY); + opal_value_load(&info->value, host, OPAL_STRING); + opal_list_append(&job_info, &info->super); + } + + /* check for 'host' */ + ompi_info_get (array_of_info[i], "host", sizeof(host) - 1, host, &flag); + if ( flag ) { + info = OBJ_NEW(opal_pmix_info_t); + info->key = strdup(OPAL_PMIX_HOST); + opal_value_load(&info->value, host, OPAL_STRING); + opal_list_append(&app->info, &info->super); + } + + /* check for 'hostfile' */ + ompi_info_get (array_of_info[i], "hostfile", sizeof(host) - 1, host, &flag); + if ( flag ) { + info = OBJ_NEW(opal_pmix_info_t); + info->key = strdup(OPAL_PMIX_HOSTFILE); + opal_value_load(&info->value, host, OPAL_STRING); + opal_list_append(&app->info, &info->super); + } + + /* check for 'add-hostfile' */ + ompi_info_get (array_of_info[i], "add-hostfile", sizeof(host) - 1, host, &flag); + if ( flag ) { + info = OBJ_NEW(opal_pmix_info_t); + info->key = strdup(OPAL_PMIX_ADD_HOSTFILE); + opal_value_load(&info->value, host, OPAL_STRING); + opal_list_append(&app->info, &info->super); + } + + /* check for 'add-host' */ + ompi_info_get (array_of_info[i], "add-host", sizeof(host) - 1, host, &flag); + if ( flag ) { + info = OBJ_NEW(opal_pmix_info_t); + info->key = strdup(OPAL_PMIX_ADD_HOST); + opal_value_load(&info->value, host, OPAL_STRING); + opal_list_append(&app->info, &info->super); + } + + /* check for env */ + ompi_info_get (array_of_info[i], "env", sizeof(host)-1, host, &flag); + if ( flag ) { + envars = opal_argv_split(host, '\n'); + for (j=0; NULL != envars[j]; j++) { + opal_argv_append_nosize(&app->env, envars[j]); + } + opal_argv_free(envars); + } + + /* 'path', 'arch', 'file', 'soft' -- to be implemented */ + + /* check for 'ompi_prefix' (OMPI-specific -- to effect the same + * behavior as --prefix option to orterun) + * + * This is a job-level key + */ + ompi_info_get (array_of_info[i], "ompi_prefix", sizeof(prefix) - 1, prefix, &flag); + if ( flag ) { + info = OBJ_NEW(opal_pmix_info_t); + info->key = strdup(OPAL_PMIX_PREFIX); + opal_value_load(&info->value, prefix, OPAL_STRING); + opal_list_append(&job_info, &info->super); + } + + /* check for 'wdir' */ + ompi_info_get (array_of_info[i], "wdir", sizeof(cwd) - 1, cwd, &flag); + if ( flag ) { + info = OBJ_NEW(opal_pmix_info_t); + info->key = strdup(OPAL_PMIX_WDIR); + opal_value_load(&info->value, cwd, OPAL_STRING); + opal_list_append(&app->info, &info->super); + have_wdir = 1; + } + + /* check for 'mapper' - a job-level key */ + ompi_info_get(array_of_info[i], "mapper", sizeof(mapper) - 1, mapper, &flag); + if ( flag ) { + info = OBJ_NEW(opal_pmix_info_t); + info->key = strdup(OPAL_PMIX_MAPPER); + opal_value_load(&info->value, mapper, OPAL_STRING); + opal_list_append(&job_info, &info->super); + } + + /* check for 'display_map' - a job-level key */ + ompi_info_get_bool(array_of_info[i], "display_map", &local_spawn, &flag); + if ( flag ) { + info = OBJ_NEW(opal_pmix_info_t); + info->key = strdup(OPAL_PMIX_DISPLAY_MAP); + opal_value_load(&info->value, &local_spawn, OPAL_BOOL); + opal_list_append(&job_info, &info->super); + } + + /* check for 'npernode' and 'ppr' - job-level key */ + ompi_info_get (array_of_info[i], "npernode", sizeof(slot_list) - 1, slot_list, &flag); + if ( flag ) { + info = OBJ_NEW(opal_pmix_info_t); + info->key = strdup(OPAL_PMIX_PPR); + info->value.type = OPAL_STRING; + (void)asprintf(&(info->value.data.string), "%s:n", slot_list); + opal_list_append(&job_info, &info->super); + } + ompi_info_get (array_of_info[i], "pernode", sizeof(slot_list) - 1, slot_list, &flag); + if ( flag ) { + info = OBJ_NEW(opal_pmix_info_t); + info->key = strdup(OPAL_PMIX_PPR); + opal_value_load(&info->value, "1:n", OPAL_STRING); + opal_list_append(&job_info, &info->super); + } + ompi_info_get (array_of_info[i], "ppr", sizeof(slot_list) - 1, slot_list, &flag); + if ( flag ) { + info = OBJ_NEW(opal_pmix_info_t); + info->key = strdup(OPAL_PMIX_PPR); + opal_value_load(&info->value, slot_list, OPAL_STRING); + opal_list_append(&job_info, &info->super); + } + + /* check for 'map_by' - job-level key */ + ompi_info_get(array_of_info[i], "map_by", sizeof(slot_list) - 1, slot_list, &flag); + if ( flag ) { + info = OBJ_NEW(opal_pmix_info_t); + info->key = strdup(OPAL_PMIX_MAPBY); + opal_value_load(&info->value, slot_list, OPAL_STRING); + opal_list_append(&job_info, &info->super); + } + + /* check for 'rank_by' - job-level key */ + ompi_info_get(array_of_info[i], "rank_by", sizeof(slot_list) - 1, slot_list, &flag); + if ( flag ) { + info = OBJ_NEW(opal_pmix_info_t); + info->key = strdup(OPAL_PMIX_RANKBY); + opal_value_load(&info->value, slot_list, OPAL_STRING); + opal_list_append(&job_info, &info->super); + } + +#if OPAL_HAVE_HWLOC + /* check for 'bind_to' - job-level key */ + ompi_info_get(array_of_info[i], "bind_to", sizeof(slot_list) - 1, slot_list, &flag); + if ( flag ) { + info = OBJ_NEW(opal_pmix_info_t); + info->key = strdup(OPAL_PMIX_BINDTO); + opal_value_load(&info->value, slot_list, OPAL_STRING); + opal_list_append(&job_info, &info->super); + } +#endif + + /* check for 'preload_binary' - job-level key */ + ompi_info_get_bool(array_of_info[i], "ompi_preload_binary", &local_spawn, &flag); + if ( flag ) { + info = OBJ_NEW(opal_pmix_info_t); + info->key = strdup(OPAL_PMIX_PRELOAD_BIN); + opal_value_load(&info->value, &local_spawn, OPAL_BOOL); + opal_list_append(&job_info, &info->super); + } + + /* check for 'preload_files' - job-level key */ + ompi_info_get (array_of_info[i], "ompi_preload_files", sizeof(cwd) - 1, cwd, &flag); + if ( flag ) { + info = OBJ_NEW(opal_pmix_info_t); + info->key = strdup(OPAL_PMIX_PRELOAD_FILES); + opal_value_load(&info->value, cwd, OPAL_STRING); + opal_list_append(&job_info, &info->super); + } + + /* see if this is a non-mpi job - if so, then set the flag so ORTE + * knows what to do - job-level key + */ + ompi_info_get_bool(array_of_info[i], "ompi_non_mpi", &non_mpi, &flag); + if (flag && non_mpi) { + info = OBJ_NEW(opal_pmix_info_t); + info->key = strdup(OPAL_PMIX_NON_PMI); + opal_value_load(&info->value, &non_mpi, OPAL_BOOL); + opal_list_append(&job_info, &info->super); + } + + /* see if this is an MCA param that the user wants applied to the child job */ + ompi_info_get (array_of_info[i], "ompi_param", sizeof(params) - 1, params, &flag); + if ( flag ) { + opal_argv_append_unique_nosize(&app->env, params, true); + } + + /* see if user specified what to do with stdin - defaults to + * not forwarding stdin to child processes - job-level key + */ + ompi_info_get (array_of_info[i], "ompi_stdin_target", sizeof(stdin_target) - 1, stdin_target, &flag); + if ( flag ) { + if (0 == strcmp(stdin_target, "all")) { + ui32 = ORTE_VPID_WILDCARD; + } else if (0 == strcmp(stdin_target, "none")) { + ui32 = ORTE_VPID_INVALID; + } else { + ui32 = strtoul(stdin_target, NULL, 10); + } + info = OBJ_NEW(opal_pmix_info_t); + info->key = strdup(OPAL_PMIX_STDIN_TGT); + opal_value_load(&info->value, &ui32, OPAL_UINT32); + opal_list_append(&job_info, &info->super); + } + } + + /* default value: If the user did not tell us where to look for the + * executable, we assume the current working directory + */ + if ( !have_wdir ) { + if (OMPI_SUCCESS != (rc = opal_getcwd(cwd, OPAL_PATH_MAX))) { + ORTE_ERROR_LOG(rc); + OPAL_LIST_DESTRUCT(&apps); + opal_progress_event_users_decrement(); + return rc; + } + info = OBJ_NEW(opal_pmix_info_t); + info->key = strdup(OPAL_PMIX_WDIR); + opal_value_load(&info->value, cwd, OPAL_STRING); + opal_list_append(&app->info, &info->super); + } + + /* leave the map info alone - the launcher will + * decide where to put things + */ + } /* for (i = 0 ; i < count ; ++i) */ + + /* default the personality - job-level key */ + if (!personality) { + info = OBJ_NEW(opal_pmix_info_t); + info->key = strdup(OPAL_PMIX_PERSONALITY); + opal_value_load(&info->value, "ompi", OPAL_STRING); + opal_list_append(&job_info, &info->super); + } + + /* spawn procs */ + rc = opal_pmix.spawn(&job_info, &apps, &jobid); + OPAL_LIST_DESTRUCT(&job_info); + OPAL_LIST_DESTRUCT(&apps); + + if (OPAL_SUCCESS != rc) { + OMPI_ERROR_LOG(rc); + opal_progress_event_users_decrement(); + return MPI_ERR_SPAWN; + } + + return OMPI_SUCCESS; +} + +/* Create a rendezvous tag consisting of our name + a random number */ +int ompi_dpm_open_port(char *port_name) +{ + uint32_t r; + + r = opal_rand(&rnd); + snprintf(port_name, MPI_MAX_PORT_NAME, "%s:%u", + OMPI_NAME_PRINT(OMPI_PROC_MY_NAME), r); + return OMPI_SUCCESS; +} + +int ompi_dpm_close_port(const char *port_name) +{ + /* nothing to do here - user is responsible for the memory */ + return OMPI_SUCCESS; +} + +int ompi_dpm_dyn_init(void) +{ + int root=0, rc; + bool send_first = true; + ompi_communicator_t *newcomm=NULL; + char *port_name=NULL, *tmp, *ptr; + + /* check for appropriate env variable */ + tmp = getenv("OMPI_PARENT_PORT"); + if (NULL == tmp) { + /* nothing to do */ + return OMPI_SUCCESS; + } + + /* the value passed to us may have quote marks around it to protect + * the value if passed on the command line. We must remove those + * to have a correct string + */ + if ('"' == tmp[0]) { + /* if the first char is a quote, then so will the last one be */ + tmp[strlen(tmp)-1] = '\0'; + ptr = &tmp[1]; + } else { + ptr = &tmp[0]; + } + port_name = strdup(ptr); + + rc = ompi_dpm_connect_accept(MPI_COMM_WORLD, root, port_name, send_first, &newcomm); + free(port_name); + if (OMPI_SUCCESS != rc) { + return rc; + } + + /* originally, we set comm_parent to comm_null (in comm_init), + * now we have to decrease the reference counters to the according + * objects + */ + OBJ_RELEASE(ompi_mpi_comm_parent->c_local_group); + OBJ_RELEASE(ompi_mpi_comm_parent->error_handler); + OBJ_RELEASE(ompi_mpi_comm_parent); + + /* Set the parent communicator */ + ompi_mpi_comm_parent = newcomm; + + /* Set name for debugging purposes */ + snprintf(newcomm->c_name, MPI_MAX_OBJECT_NAME, "MPI_COMM_PARENT"); + newcomm->c_flags |= OMPI_COMM_NAMEISSET; + + return OMPI_SUCCESS; +} + + +/* + * finalize the module + */ +int ompi_dpm_finalize(void) +{ + return OMPI_SUCCESS; +} + + +/**********************************************************************/ +/**********************************************************************/ +/**********************************************************************/ +/* this routine runs through the list of communicators + and does the disconnect for all dynamic communicators */ +int ompi_dpm_dyn_finalize(void) +{ + int i,j=0, max=0; + ompi_dpm_disconnect_obj **objs=NULL; + ompi_communicator_t *comm=NULL; + + if (1 size = ompi_comm_remote_size(comm); + } else { + obj->size = ompi_comm_size(comm); + } + + obj->comm = comm; + obj->reqs = (ompi_request_t**)malloc(2*obj->size*sizeof(ompi_request_t *)); + if (NULL == obj->reqs) { + opal_output(0, "Could not allocate request array for disconnect object"); + free(obj); + return NULL; + } + + /* initiate all isend_irecvs. We use a dummy buffer stored on + the object, since we are sending zero size messages anyway. */ + for (i=0; i < obj->size; i++) { + ret = MCA_PML_CALL(irecv(&(obj->buf), 0, MPI_INT, i, + OMPI_COMM_BARRIER_TAG, comm, + &(obj->reqs[2*i]))); + + if (OMPI_SUCCESS != ret) { + opal_output(0, "dpm_disconnect_init: error %d in irecv to process %d", ret, i); + free(obj->reqs); + free(obj); + return NULL; + } + ret = MCA_PML_CALL(isend(&(obj->buf), 0, MPI_INT, i, + OMPI_COMM_BARRIER_TAG, + MCA_PML_BASE_SEND_SYNCHRONOUS, + comm, &(obj->reqs[2*i+1]))); + + if (OMPI_SUCCESS != ret) { + opal_output(0, "dpm_disconnect_init: error %d in isend to process %d", ret, i); + free(obj->reqs); + free(obj); + return NULL; + } + } + + /* return handle */ + return obj; +} +/**********************************************************************/ +/**********************************************************************/ +/**********************************************************************/ +/* - count how many requests are active + * - generate a request array large enough to hold + all active requests + * - call waitall on the overall request array + * - free the objects + */ +static int disconnect_waitall (int count, ompi_dpm_disconnect_obj **objs) +{ + + ompi_request_t **reqs=NULL; + char *treq=NULL; + int totalcount = 0; + int i; + int ret; + + for (i=0; isize; + } + + reqs = (ompi_request_t**)malloc(2*totalcount*sizeof(ompi_request_t *)); + if (NULL == reqs) { + opal_output(0, "ompi_comm_disconnect_waitall: error allocating memory"); + return OMPI_ERROR; + } + + /* generate a single, large array of pending requests */ + treq = (char *)reqs; + for (i=0; ireqs, 2*objs[i]->size * sizeof(ompi_request_t *)); + treq += 2*objs[i]->size * sizeof(ompi_request_t *); + } + + /* force all non-blocking all-to-alls to finish */ + ret = ompi_request_wait_all(2*totalcount, reqs, MPI_STATUSES_IGNORE); + + /* Finally, free everything */ + for (i=0; i< count; i++ ) { + if (NULL != objs[i]->reqs ) { + free(objs[i]->reqs ); + free(objs[i]); + } + } + + free(reqs); + + return ret; +} + +/**********************************************************************/ +/**********************************************************************/ +/**********************************************************************/ +/* All we want to do in this function is determine if the number of + * jobids in the local and/or remote group is > 1. This tells us to + * set the disconnect flag. We don't actually care what the true + * number -is-, only that it is > 1 + */ +void ompi_dpm_mark_dyncomm(ompi_communicator_t *comm) +{ + int i; + int size, rsize; + bool found=false; + ompi_jobid_t thisjobid; + ompi_group_t *grp=NULL; + ompi_proc_t *proc = NULL; + + /* special case for MPI_COMM_NULL */ + if (comm == MPI_COMM_NULL) { + return; + } + + size = ompi_comm_size(comm); + rsize = ompi_comm_remote_size(comm); + + /* loop over all processes in local group and check for + * a different jobid + */ + grp = comm->c_local_group; + proc = ompi_group_peer_lookup(grp,0); + thisjobid = ((ompi_process_name_t*)&proc->super.proc_name)->jobid; + + for (i=1; i< size; i++) { + proc = ompi_group_peer_lookup(grp,i); + if (thisjobid != ((ompi_process_name_t*)&proc->super.proc_name)->jobid) { + /* at least one is different */ + found = true; + goto complete; + } + } + + /* if inter-comm, loop over all processes in remote_group + * and see if any are different from thisjobid + */ + grp = comm->c_remote_group; + for (i=0; i< rsize; i++) { + proc = ompi_group_peer_lookup(grp,i); + if (thisjobid != ((ompi_process_name_t*)&proc->super.proc_name)->jobid) { + /* at least one is different */ + found = true; + break; + } + } + + complete: + /* if a different jobid was found, set the disconnect flag*/ + if (found) { + ompi_comm_num_dyncomm++; + OMPI_COMM_SET_DYNAMIC(comm); + } + + return; +} diff --git a/ompi/dpm/dpm.h b/ompi/dpm/dpm.h new file mode 100644 index 0000000000..34084480f8 --- /dev/null +++ b/ompi/dpm/dpm.h @@ -0,0 +1,106 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2011 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved + * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +/** + * @file + * + * Dynamic Process Management Interface + * + */ + +#ifndef OMPI_DPM_H +#define OMPI_DPM_H + +#include "ompi_config.h" + +#include "ompi/info/info.h" +#include "ompi/communicator/communicator.h" + +BEGIN_C_DECLS + +/* + * Initialize the DPM system + */ +int ompi_dpm_init(void); + +/* + * Connect/accept communications + */ +int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root, + const char *port, bool send_first, + ompi_communicator_t **newcomm); + +/** + * Executes internally a disconnect on all dynamic communicators + * in case the user did not disconnect them. + */ +int ompi_dpm_disconnect(ompi_communicator_t *comm); + +/* + * Dynamically spawn processes + */ +int ompi_dpm_spawn(int count, char const *array_of_commands[], + char **array_of_argv[], + const int array_of_maxprocs[], + const MPI_Info array_of_info[], + const char *port_name); + +/* + * This routine checks, whether an application has been spawned + * by another MPI application, or has been independently started. + * If it has been spawned, it establishes the parent communicator. + * Since the routine has to communicate, it should be among the last + * steps in MPI_Init, to be sure that everything is already set up. + */ +int ompi_dpm_dyn_init(void); + +/* + * Interface for mpi_finalize to call to ensure dynamically spawned procs + * collectively finalize + */ +int ompi_dpm_dyn_finalize(void); + +/* this routine counts the number of different jobids of the processes + given in a certain communicator. If there is more than one jobid, + we mark the communicator as 'dynamic'. This is especially relevant + for the MPI_Comm_disconnect *and* for MPI_Finalize, where we have + to wait for all still connected processes. +*/ +void ompi_dpm_mark_dyncomm(ompi_communicator_t *comm); + +/* + * Define a rendezvous point for a dynamically spawned job + */ +int ompi_dpm_open_port(char *port_name); + +/* + * Unpublish the rendezvous point + */ +int ompi_dpm_close_port(const char *port_name); + +/* + * Finalize the DPM + */ +int ompi_dpm_finalize(void); + +END_C_DECLS + +#endif /* OMPI_DPM_H */ diff --git a/ompi/mca/dpm/Makefile.am b/ompi/mca/dpm/Makefile.am deleted file mode 100644 index 7502d7187b..0000000000 --- a/ompi/mca/dpm/Makefile.am +++ /dev/null @@ -1,37 +0,0 @@ -# -# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# main library setup -noinst_LTLIBRARIES = libmca_dpm.la -libmca_dpm_la_SOURCES = - -# local files -headers = dpm.h -libmca_dpm_la_SOURCES += $(headers) - -# Conditionally install the header files -if WANT_INSTALL_HEADERS -ompidir = $(ompiincludedir)/$(subdir) -nobase_ompi_HEADERS = $(headers) -endif - -include base/Makefile.am - -distclean-local: - rm -f base/static-components.h diff --git a/ompi/mca/dpm/base/base.h b/ompi/mca/dpm/base/base.h deleted file mode 100644 index 3e34a0b8a1..0000000000 --- a/ompi/mca/dpm/base/base.h +++ /dev/null @@ -1,101 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#ifndef OMPI_MCA_DPM_BASE_H -#define OMPI_MCA_DPM_BASE_H - -#include "ompi_config.h" -#include "ompi/constants.h" - -#include -#if HAVE_SYS_TIME_H -#include -#endif - -#include "ompi/mca/dpm/dpm.h" - -/* - * Global functions for MCA overall DPM - */ - -BEGIN_C_DECLS - -struct ompi_dpm_base_disconnect_obj { - ompi_communicator_t *comm; - int size; - struct ompi_request_t **reqs; - int buf; -}; -typedef struct ompi_dpm_base_disconnect_obj ompi_dpm_base_disconnect_obj; - -/** - * Select an available component. - * - * @retval OMPI_SUCCESS Upon Success - * @retval OMPI_NOT_FOUND If no component can be selected - * @retval OMPI_ERROR Upon other failure - * - */ -OMPI_DECLSPEC int ompi_dpm_base_select(void); - -/* Internal support functions */ -OMPI_DECLSPEC char* ompi_dpm_base_dyn_init (void); -OMPI_DECLSPEC int ompi_dpm_base_dyn_finalize (void); -OMPI_DECLSPEC void ompi_dpm_base_mark_dyncomm (ompi_communicator_t *comm); -OMPI_DECLSPEC ompi_dpm_base_disconnect_obj *ompi_dpm_base_disconnect_init ( ompi_communicator_t *comm); -OMPI_DECLSPEC int ompi_dpm_base_disconnect_waitall (int count, ompi_dpm_base_disconnect_obj **objs); - -/* NULL component functions */ -int ompi_dpm_base_null_connect_accept (ompi_communicator_t *comm, int root, - const char *port_string, bool send_first, - ompi_communicator_t **newcomm); -int ompi_dpm_base_null_disconnect(ompi_communicator_t *comm); -int ompi_dpm_base_null_spawn(int count, const char *array_of_commands[], - char **array_of_argv[], - const int array_of_maxprocs[], - const MPI_Info array_of_info[], - const char *port_name); -int ompi_dpm_base_null_dyn_init(void); -int ompi_dpm_base_null_dyn_finalize (void); -void ompi_dpm_base_null_mark_dyncomm (ompi_communicator_t *comm); -int ompi_dpm_base_null_open_port(char *port_name, ompi_rml_tag_t given_tag); -int ompi_dpm_base_null_parse_port(const char *port_name, - char **hnp_uri, char **rml_uri, ompi_rml_tag_t *tag); -int ompi_dpm_base_null_route_to_port(char *rml_uri, ompi_process_name_t *rproc); -int ompi_dpm_base_null_close_port(const char *port_name); -int ompi_dpm_base_null_pconnect(char *port, - struct timeval *timeout, - ompi_dpm_base_paccept_connect_callback_fn_t cbfunc, - void *cbdata); -int ompi_dpm_base_null_paccept(char *port, - ompi_dpm_base_paccept_connect_callback_fn_t cbfunc, - void *cbdata); -void ompi_dpm_base_null_pclose(char *port); - -/* useful globals */ -OMPI_DECLSPEC extern ompi_dpm_base_component_t ompi_dpm_base_selected_component; -OMPI_DECLSPEC extern ompi_dpm_base_module_t ompi_dpm; - -OMPI_DECLSPEC extern mca_base_framework_t ompi_dpm_base_framework; - -END_C_DECLS - -#endif /* OMPI_MCA_DPM_BASE_H */ diff --git a/ompi/mca/dpm/base/dpm_base_common_fns.c b/ompi/mca/dpm/base/dpm_base_common_fns.c deleted file mode 100644 index e710d9a494..0000000000 --- a/ompi/mca/dpm/base/dpm_base_common_fns.c +++ /dev/null @@ -1,289 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2007 University of Houston. All rights reserved. - * Copyright (c) 2006-2007 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved. - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include -#include - -#include "ompi/request/request.h" -#include "ompi/mca/dpm/dpm.h" -#include "ompi/mca/pml/pml.h" - -#include "ompi/mca/dpm/base/base.h" - - -char* ompi_dpm_base_dyn_init (void) -{ - char *envvarname=NULL, *port_name=NULL, *tmp, *ptr; - - /* check for appropriate env variable */ - asprintf(&envvarname, "OMPI_PARENT_PORT"); - tmp = getenv(envvarname); - free (envvarname); - if (NULL != tmp) { - /* the value passed to us may have quote marks around it to protect - * the value if passed on the command line. We must remove those - * to have a correct string - */ - if ('"' == tmp[0]) { - /* if the first char is a quote, then so will the last one be */ - tmp[strlen(tmp)-1] = '\0'; - ptr = &tmp[1]; - } else { - ptr = &tmp[0]; - } - port_name = strdup(ptr); - } - - return port_name; -} - -/**********************************************************************/ -/**********************************************************************/ -/**********************************************************************/ -/* this routine runs through the list of communicators - and does the disconnect for all dynamic communicators */ -int ompi_dpm_base_dyn_finalize (void) -{ - int i,j=0, max=0; - ompi_dpm_base_disconnect_obj **objs=NULL; - ompi_communicator_t *comm=NULL; - - if ( 1 size = ompi_comm_remote_size (comm); - } else { - obj->size = ompi_comm_size (comm); - } - - obj->comm = comm; - obj->reqs = (ompi_request_t **) malloc(2*obj->size*sizeof(ompi_request_t *)); - if ( NULL == obj->reqs ) { - printf("Could not allocate request array for disconnect object\n"); - free (obj); - return NULL; - } - - /* initiate all isend_irecvs. We use a dummy buffer stored on - the object, since we are sending zero size messages anyway. */ - for ( i=0; i < obj->size; i++ ) { - ret = MCA_PML_CALL(irecv (&(obj->buf), 0, MPI_INT, i, - OMPI_COMM_BARRIER_TAG, comm, - &(obj->reqs[2*i]))); - - if ( OMPI_SUCCESS != ret ) { - printf("dpm_base_disconnect_init: error %d in irecv to process %d\n", ret, i); - free (obj->reqs); - free (obj); - return NULL; - } - ret = MCA_PML_CALL(isend (&(obj->buf), 0, MPI_INT, i, - OMPI_COMM_BARRIER_TAG, - MCA_PML_BASE_SEND_SYNCHRONOUS, - comm, &(obj->reqs[2*i+1]))); - - if ( OMPI_SUCCESS != ret ) { - printf("dpm_base_disconnect_init: error %d in isend to process %d\n", ret, i); - free (obj->reqs); - free (obj); - return NULL; - } - } - - /* return handle */ - return obj; -} -/**********************************************************************/ -/**********************************************************************/ -/**********************************************************************/ -/* - count how many requests are active - * - generate a request array large enough to hold - all active requests - * - call waitall on the overall request array - * - free the objects - */ -int ompi_dpm_base_disconnect_waitall (int count, ompi_dpm_base_disconnect_obj **objs) -{ - - ompi_request_t **reqs=NULL; - char *treq=NULL; - int totalcount = 0; - int i; - int ret; - - for (i=0; isize; - } - - reqs = (ompi_request_t **) malloc (2*totalcount*sizeof(ompi_request_t *)); - if ( NULL == reqs ) { - printf("ompi_comm_disconnect_waitall: error allocating memory\n"); - return OMPI_ERROR; - } - - /* generate a single, large array of pending requests */ - treq = (char *)reqs; - for (i=0; ireqs, 2*objs[i]->size * sizeof(ompi_request_t *)); - treq += 2*objs[i]->size * sizeof(ompi_request_t *); - } - - /* force all non-blocking all-to-alls to finish */ - ret = ompi_request_wait_all (2*totalcount, reqs, MPI_STATUSES_IGNORE); - - /* Finally, free everything */ - for (i=0; i< count; i++ ) { - if (NULL != objs[i]->reqs ) { - free (objs[i]->reqs ); - free (objs[i]); - } - } - - free (reqs); - - return ret; -} - -/**********************************************************************/ -/**********************************************************************/ -/**********************************************************************/ -/* All we want to do in this function is determine if the number of - * jobids in the local and/or remote group is > 1. This tells us to - * set the disconnect flag. We don't actually care what the true - * number -is-, only that it is > 1 - */ -void ompi_dpm_base_mark_dyncomm (ompi_communicator_t *comm) -{ - int i; - int size, rsize; - bool found=false; - ompi_jobid_t thisjobid; - ompi_group_t *grp=NULL; - ompi_proc_t *proc = NULL; - - /* special case for MPI_COMM_NULL */ - if ( comm == MPI_COMM_NULL ) { - return; - } - - size = ompi_comm_size (comm); - rsize = ompi_comm_remote_size(comm); - - /* loop over all processes in local group and check for - * a different jobid - */ - grp = comm->c_local_group; - proc = ompi_group_peer_lookup(grp,0); - thisjobid = ((ompi_process_name_t*)&proc->super.proc_name)->jobid; - - for (i=1; i< size; i++) { - proc = ompi_group_peer_lookup(grp,i); - if (thisjobid != ((ompi_process_name_t*)&proc->super.proc_name)->jobid) { - /* at least one is different */ - found = true; - goto complete; - } - } - - /* if inter-comm, loop over all processes in remote_group - * and see if any are different from thisjobid - */ - grp = comm->c_remote_group; - for (i=0; i< rsize; i++) { - proc = ompi_group_peer_lookup(grp,i); - if (thisjobid != ((ompi_process_name_t*)&proc->super.proc_name)->jobid) { - /* at least one is different */ - found = true; - break; - } - } - - complete: - /* if a different jobid was found, set the disconnect flag*/ - if (found) { - ompi_comm_num_dyncomm++; - OMPI_COMM_SET_DYNAMIC(comm); - } - - return; -} diff --git a/ompi/mca/dpm/base/dpm_base_frame.c b/ompi/mca/dpm/base/dpm_base_frame.c deleted file mode 100644 index ecac6fae07..0000000000 --- a/ompi/mca/dpm/base/dpm_base_frame.c +++ /dev/null @@ -1,68 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/mca/mca.h" -#include "opal/util/output.h" -#include "opal/mca/base/base.h" - -#include "ompi/mca/dpm/dpm.h" -#include "ompi/mca/dpm/base/base.h" - -#include "ompi/mca/dpm/base/static-components.h" - -/* - * Globals - */ -OMPI_DECLSPEC ompi_dpm_base_module_t ompi_dpm = { - NULL, - ompi_dpm_base_null_connect_accept, - ompi_dpm_base_null_disconnect, - ompi_dpm_base_null_spawn, - ompi_dpm_base_null_dyn_init, - ompi_dpm_base_null_dyn_finalize, - ompi_dpm_base_null_mark_dyncomm, - ompi_dpm_base_null_open_port, - ompi_dpm_base_null_parse_port, - ompi_dpm_base_null_route_to_port, - ompi_dpm_base_null_close_port, - NULL, - ompi_dpm_base_null_pconnect, - ompi_dpm_base_null_paccept, - ompi_dpm_base_null_pclose -}; -ompi_dpm_base_component_t ompi_dpm_base_selected_component = {{0}}; - -static int ompi_dpm_base_close(void) -{ - /* Close the selected component */ - if( NULL != ompi_dpm.finalize ) { - ompi_dpm.finalize(); - } - - /* Close all available modules that are open */ - return mca_base_framework_components_close(&ompi_dpm_base_framework, NULL); -} - -MCA_BASE_FRAMEWORK_DECLARE(ompi, dpm, NULL, NULL, NULL, ompi_dpm_base_close, - mca_dpm_base_static_components, 0); diff --git a/ompi/mca/dpm/base/dpm_base_null_fns.c b/ompi/mca/dpm/base/dpm_base_null_fns.c deleted file mode 100644 index 454bc6869d..0000000000 --- a/ompi/mca/dpm/base/dpm_base_null_fns.c +++ /dev/null @@ -1,113 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2007 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2007 University of Houston. All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved - * - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include -#include -#include -#if HAVE_SYS_TIME_H -#include -#endif - -#include "ompi/mca/dpm/dpm.h" -#include "ompi/mca/dpm/base/base.h" - - -int ompi_dpm_base_null_connect_accept (ompi_communicator_t *comm, int root, - const char *port_string, bool send_first, - ompi_communicator_t **newcomm) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - -int ompi_dpm_base_null_disconnect(ompi_communicator_t *comm) -{ - return OMPI_SUCCESS; -} - -int ompi_dpm_base_null_spawn(int count, const char *array_of_commands[], - char **array_of_argv[], - const int array_of_maxprocs[], - const MPI_Info array_of_info[], - const char *port_name) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - -int ompi_dpm_base_null_dyn_init(void) -{ - return OMPI_SUCCESS; -} - -int ompi_dpm_base_null_dyn_finalize (void) -{ - return OMPI_SUCCESS; -} - -void ompi_dpm_base_null_mark_dyncomm (ompi_communicator_t *comm) -{ - return; -} - -int ompi_dpm_base_null_open_port(char *port_name, ompi_rml_tag_t given_tag) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - -int ompi_dpm_base_null_parse_port(const char *port_name, - char **hnp_uri, char **rml_uri, ompi_rml_tag_t *tag) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - -int ompi_dpm_base_null_route_to_port(char *rml_uri, ompi_process_name_t *rproc) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - -int ompi_dpm_base_null_close_port(const char *port_name) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - -int ompi_dpm_base_null_pconnect(char *port, - struct timeval *timeout, - ompi_dpm_base_paccept_connect_callback_fn_t cbfunc, - void *cbdata) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - -int ompi_dpm_base_null_paccept(char *port, - ompi_dpm_base_paccept_connect_callback_fn_t cbfunc, - void *cbdata) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - -void ompi_dpm_base_null_pclose(char *port) -{ - return; -} diff --git a/ompi/mca/dpm/base/dpm_base_select.c b/ompi/mca/dpm/base/dpm_base_select.c deleted file mode 100644 index 81c646d5a3..0000000000 --- a/ompi/mca/dpm/base/dpm_base_select.c +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/mca/mca.h" -#include "opal/mca/base/base.h" - -#include "opal/mca/base/mca_base_component_repository.h" - -#include "ompi/mca/dpm/dpm.h" -#include "ompi/mca/dpm/base/base.h" - - -int ompi_dpm_base_select(void) -{ - int ret; - ompi_dpm_base_component_t *best_component = NULL; - ompi_dpm_base_module_t *best_module = NULL; - - /* - * Select the best component - */ - if( OPAL_SUCCESS != (ret = mca_base_select("dpm", ompi_dpm_base_framework.framework_output, - &ompi_dpm_base_framework.framework_components, - (mca_base_module_t **) &best_module, - (mca_base_component_t **) &best_component))) { - /* it is okay not to find any executable components */ - if (OMPI_ERR_NOT_FOUND == ret) { - ret = OPAL_SUCCESS; - } - goto cleanup; - } - - /* Save the winner */ - ompi_dpm = *best_module; - ompi_dpm_base_selected_component = *best_component; - - /* init the selected module */ - if (NULL != ompi_dpm.init) { - ret = ompi_dpm.init(); - } - - cleanup: - return ret; -} diff --git a/ompi/mca/dpm/base/owner.txt b/ompi/mca/dpm/base/owner.txt deleted file mode 100644 index 4ad6f408ca..0000000000 --- a/ompi/mca/dpm/base/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: INTEL -status: maintenance diff --git a/ompi/mca/dpm/dpm.h b/ompi/mca/dpm/dpm.h deleted file mode 100644 index 2fc1acc00d..0000000000 --- a/ompi/mca/dpm/dpm.h +++ /dev/null @@ -1,233 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved - * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - * - * Dynamic Process Management Interface - * - */ - -#ifndef OMPI_MCA_DPM_H -#define OMPI_MCA_DPM_H - -#include "ompi_config.h" - -#include -#if HAVE_SYS_TIME_H -#include -#endif - -#include "ompi/mca/mca.h" -#include "opal/mca/base/base.h" - -#include "ompi/info/info.h" -#include "ompi/communicator/communicator.h" - -BEGIN_C_DECLS - -/* - * Initialize a module - */ -typedef int (*ompi_dpm_base_module_init_fn_t)(void); - -/* - * Connect/accept communications - */ -typedef int (*ompi_dpm_base_module_connect_accept_fn_t)(ompi_communicator_t *comm, int root, - const char *port, bool send_first, - ompi_communicator_t **newcomm); - -/* define a callback function for use by non-blocking persistent connect/accept operations */ -typedef void (*ompi_dpm_base_paccept_connect_callback_fn_t)(ompi_communicator_t *newcomm, - ompi_proc_t *remote_proc, - void *cbdata); - -/* - * Create a persistent connection point for accepting non-blocking connection requests. - * The accept is persistent and will remain open until explicitly closed, or during - * dpm_framework_close. Any incoming connection request will be used to create a new - * communicator which will be returned via callback, along with the process name. - * - * In both cases, the callback function will return the new communicator plus the - * user's original cbdata. - * - * paccept requires a port (typically obtained by a prior call to MPI_Open_port). - * This must be published so it can be found by processes wanting to - * connect to this process, and is passed by those processes as the "port" argument for - * pconnect. - * - * Calls to pconnect are also non-blocking, with callback upon completion. Periodic - * attempts to complete the connection may be made at the discretion of the implementation. - * Failure to connect will be indicated by a callback returning a NULL communicator. Callers - * should use the cbdata to track the corresponding pconnect request. A timeout - * is provided to avoid hanging should the other process not have an active paccept - * on the specified port (e.g., the process may have closed it). A NULL value for - * the timeout argument indicates that the pconnect operation should not timeout, - * and will regularly retry the connection forever. - * - * Processes may create and publish as many ports, and call paccept as many times, as - * they like. When a process no longer wishes to accept connect requests, it can "close" - * a paccept request by passing in the port used when calling paccept. A call to "close" - * with a NULL argument will close *all* currently registered paccept channels. - */ -typedef int (*ompi_dpm_base_module_paccept_fn_t)(char *port, - ompi_dpm_base_paccept_connect_callback_fn_t cbfunc, - void *cbdata); - -typedef int (*ompi_dpm_base_module_pconnect_fn_t)(char *port, - struct timeval *timeout, - ompi_dpm_base_paccept_connect_callback_fn_t cbfunc, - void *cbdata); - -typedef void (*ompi_dpm_base_module_pclose_fn_t)(char *port); - - -/** - * Executes internally a disconnect on all dynamic communicators - * in case the user did not disconnect them. - */ -typedef int (*ompi_dpm_base_module_disconnect_fn_t)(ompi_communicator_t *comm); - -/* - * Dynamically spawn processes - */ -typedef int (*ompi_dpm_base_module_spawn_fn_t)(int count, char const *array_of_commands[], - char **array_of_argv[], - const int array_of_maxprocs[], - const MPI_Info array_of_info[], - const char *port_name); - -/* - * This routine checks, whether an application has been spawned - * by another MPI application, or has been independently started. - * If it has been spawned, it establishes the parent communicator. - * Since the routine has to communicate, it should be among the last - * steps in MPI_Init, to be sure that everything is already set up. - */ -typedef int (*ompi_dpm_base_module_dyn_init_fn_t)(void); - -/* - * Interface for mpi_finalize to call to ensure dynamically spawned procs - * collectively finalize - */ -typedef int (*ompi_dpm_base_module_dyn_finalize_fn_t)(void); - -/* this routine counts the number of different jobids of the processes - given in a certain communicator. If there is more than one jobid, - we mark the communicator as 'dynamic'. This is especially relevant - for the MPI_Comm_disconnect *and* for MPI_Finalize, where we have - to wait for all still connected processes. -*/ -typedef void (*ompi_dpm_base_module_mark_dyncomm_fn_t)(ompi_communicator_t *comm); - -/* - * Open a port to interface to a dynamically spawned job - if the - * specified tag is valid, then it will be used to form the port. Otherwise, - * a dynamically assigned tag that is unique to this request will be provided - */ -typedef int (*ompi_dpm_base_module_open_port_fn_t)(char *port_name, ompi_rml_tag_t tag); - -/* - * Converts an opaque port string to a RML process nane and tag. - */ -typedef int (*ompi_dpm_base_module_parse_port_name_t)(const char *port_name, - char **hnp_uri, char **rml_uri, - ompi_rml_tag_t *tag); - -/* - * Update the routed component to make sure that the RML can send messages to - * the remote port - */ -typedef int (*ompi_dpm_base_module_route_to_port_t)(char *rml_uri, ompi_process_name_t *rproc); - - -/* - * Close a port - */ -typedef int (*ompi_dpm_base_module_close_port_fn_t)(const char *port_name); - -/* - * Finalize a module - */ -typedef int (*ompi_dpm_base_module_finalize_fn_t)(void); - -/** -* Structure for DPM modules - */ -struct ompi_dpm_base_module_1_0_0_t { - /** Initialization Function */ - ompi_dpm_base_module_init_fn_t init; - /* connect/accept */ - ompi_dpm_base_module_connect_accept_fn_t connect_accept; - /* disconnect */ - ompi_dpm_base_module_disconnect_fn_t disconnect; - /* spawn processes */ - ompi_dpm_base_module_spawn_fn_t spawn; - /* dyn_init */ - ompi_dpm_base_module_dyn_init_fn_t dyn_init; - /* dyn_finalize */ - ompi_dpm_base_module_dyn_finalize_fn_t dyn_finalize; - /* mark dyncomm */ - ompi_dpm_base_module_mark_dyncomm_fn_t mark_dyncomm; - /* open port */ - ompi_dpm_base_module_open_port_fn_t open_port; - /* parse port string */ - ompi_dpm_base_module_parse_port_name_t parse_port; - /* update route to a port */ - ompi_dpm_base_module_route_to_port_t route_to_port; - /* close port */ - ompi_dpm_base_module_close_port_fn_t close_port; - /* finalize */ - ompi_dpm_base_module_finalize_fn_t finalize; - /* pconnect/accept */ - ompi_dpm_base_module_pconnect_fn_t pconnect; - ompi_dpm_base_module_paccept_fn_t paccept; - ompi_dpm_base_module_pclose_fn_t pclose; -}; -typedef struct ompi_dpm_base_module_1_0_0_t ompi_dpm_base_module_1_0_0_t; -typedef struct ompi_dpm_base_module_1_0_0_t ompi_dpm_base_module_t; - -OMPI_DECLSPEC extern ompi_dpm_base_module_t ompi_dpm; - - -/** - * Structure for DPM components. - */ -struct ompi_dpm_base_component_2_0_0_t { - /** MCA base component */ - mca_base_component_t base_version; - /** MCA base data */ - mca_base_component_data_t base_data; -}; -typedef struct ompi_dpm_base_component_2_0_0_t ompi_dpm_base_component_2_0_0_t; -typedef struct ompi_dpm_base_component_2_0_0_t ompi_dpm_base_component_t; - -/** - * Macro for use in components that are of type DPM - */ -#define OMPI_DPM_BASE_VERSION_2_0_0 \ - OMPI_MCA_BASE_VERSION_2_1_0("dpm", 2, 0, 0) - - -END_C_DECLS - -#endif /* OMPI_MCA_DPM_H */ diff --git a/ompi/mca/dpm/orte/Makefile.am b/ompi/mca/dpm/orte/Makefile.am deleted file mode 100644 index ef96097757..0000000000 --- a/ompi/mca/dpm/orte/Makefile.am +++ /dev/null @@ -1,43 +0,0 @@ -# -# Copyright (c) 2004-2006 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - - -dist_ompidata_DATA = help-ompi-dpm-orte.txt - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_ompi_dpm_orte_DSO -component_noinst = -component_install = mca_dpm_orte.la -else -component_noinst = libmca_dpm_orte.la -component_install = -endif - -local_sources = \ - dpm_orte.c \ - dpm_orte.h \ - dpm_orte_component.c - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_dpm_orte_la_SOURCES = $(local_sources) -mca_dpm_orte_la_LDFLAGS = -module -avoid-version $(dpm_orte_LDFLAGS) - -noinst_LTLIBRARIES = $(component_noinst) -libmca_dpm_orte_la_SOURCES = $(local_sources) -libmca_dpm_orte_la_LIBADD = $(dpm_orte_LIBS) -libmca_dpm_orte_la_LDFLAGS = -module -avoid-version $(dpm_orte_LDFLAGS) - diff --git a/ompi/mca/dpm/orte/configure.m4 b/ompi/mca/dpm/orte/configure.m4 deleted file mode 100644 index 8450f750cc..0000000000 --- a/ompi/mca/dpm/orte/configure.m4 +++ /dev/null @@ -1,25 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2011 Los Alamos National Security, LLC. -# All rights reserved. -# Copyright (c) 2014 Intel, Inc. All rights reserved -# -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_dpm_orte_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_ompi_dpm_orte_CONFIG],[ - AC_CONFIG_FILES([ompi/mca/dpm/orte/Makefile]) - - AC_ARG_WITH([orte], - AC_HELP_STRING([--with-orte], - [Use ORTE run-time environment (default: yes)])) - AS_IF([test "$with_orte" != "no"], - [$1], - [$2]) -]) diff --git a/ompi/mca/dpm/orte/dpm_orte.c b/ompi/mca/dpm/orte/dpm_orte.c deleted file mode 100644 index 463aa5872a..0000000000 --- a/ompi/mca/dpm/orte/dpm_orte.c +++ /dev/null @@ -1,1764 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2006-2009 University of Houston. All rights reserved. - * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. - * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved - * Copyright (c) 2014-2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "ompi/constants.h" - -#include -#include -#include -#include -#if HAVE_SYS_TIME_H -#include -#endif - -#include "opal/util/argv.h" -#include "opal/util/opal_getcwd.h" -#include "opal/dss/dss.h" -#include "opal/mca/dstore/dstore.h" -#include "opal/mca/hwloc/base/base.h" -#include "opal/mca/pmix/pmix.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/grpcomm/base/base.h" -#include "orte/mca/plm/base/base.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/rml/rml_types.h" -#include "orte/mca/rmaps/rmaps.h" -#include "orte/mca/rmaps/rmaps_types.h" -#include "orte/mca/rmaps/base/base.h" -#include "orte/mca/rml/base/rml_contact.h" -#include "orte/mca/routed/routed.h" -#include "orte/util/name_fns.h" -#include "orte/util/show_help.h" -#include "orte/runtime/orte_globals.h" -#include "orte/runtime/orte_wait.h" - -#include "ompi/communicator/communicator.h" -#include "ompi/group/group.h" -#include "ompi/proc/proc.h" -#include "ompi/mca/pml/pml.h" -#include "ompi/mca/rte/rte.h" -#include "ompi/info/info.h" - -#include "ompi/mca/dpm/base/base.h" -#include "dpm_orte.h" - -/* Local static variables */ -static opal_mutex_t ompi_dpm_port_mutex; -static orte_rml_tag_t next_tag; -static opal_list_t orte_dpm_acceptors, orte_dpm_connectors, dynamics; -static uint32_t next_preq=0; - -/* API functions */ -static int init(void); -static int connect_accept (ompi_communicator_t *comm, int root, - const char *port_string, bool send_first, - ompi_communicator_t **newcomm); -static int disconnect(ompi_communicator_t *comm); -static int spawn(int count, const char *array_of_commands[], - char **array_of_argv[], - const int array_of_maxprocs[], - const MPI_Info array_of_info[], - const char *port_name); -static int dyn_init(void); -static int open_port(char *port_name, orte_rml_tag_t given_tag); -static int parse_port_name(const char *port_name, char **hnp_uri, char **rml_uri, - orte_rml_tag_t *tag); -static int route_to_port(char *rml_uri, orte_process_name_t *rproc); -static int close_port(const char *port_name); -static int finalize(void); -static int dpm_pconnect(char *port, - struct timeval *timeout, - ompi_dpm_base_paccept_connect_callback_fn_t cbfunc, - void *cbdata); -static int dpm_paccept(char *port, - ompi_dpm_base_paccept_connect_callback_fn_t cbfunc, - void *cbdata); -static void dpm_pclose(char *port); - -/* - * instantiate the module - */ -ompi_dpm_base_module_t ompi_dpm_orte_module = { - init, - connect_accept, - disconnect, - spawn, - dyn_init, - ompi_dpm_base_dyn_finalize, - ompi_dpm_base_mark_dyncomm, - open_port, - parse_port_name, - route_to_port, - close_port, - finalize, - dpm_pconnect, - dpm_paccept, - dpm_pclose -}; - -typedef struct { - opal_list_item_t super; - opal_event_t ev; - bool event_active; - uint32_t id; - uint32_t cid; - orte_rml_tag_t tag; - ompi_dpm_base_paccept_connect_callback_fn_t cbfunc; - void *cbdata; -} orte_dpm_prequest_t; -OBJ_CLASS_INSTANCE(orte_dpm_prequest_t, - opal_list_item_t, - NULL, NULL); - - -static void connect_complete(int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata); - -/* - * Init the module - */ -static int init(void) -{ - OBJ_CONSTRUCT(&ompi_dpm_port_mutex, opal_mutex_t); - next_tag = OMPI_RML_TAG_DYNAMIC; - OBJ_CONSTRUCT(&orte_dpm_acceptors, opal_list_t); - OBJ_CONSTRUCT(&orte_dpm_connectors, opal_list_t); - OBJ_CONSTRUCT(&dynamics, opal_list_t); - - /* post a receive for pconnect request responses */ - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - OMPI_RML_PCONNECT_TAG, - ORTE_RML_PERSISTENT, - connect_complete, NULL); - - - return OMPI_SUCCESS; -} - -static int connect_accept(ompi_communicator_t *comm, int root, - const char *port_string, bool send_first, - ompi_communicator_t **newcomm) -{ - int size, rsize, rank, rc; - orte_std_cntr_t num_vals; - orte_std_cntr_t rnamebuflen = 0; - int rnamebuflen_int = 0; - void *rnamebuf=NULL; - - ompi_communicator_t *newcomp=MPI_COMM_NULL; - ompi_proc_t **rprocs=NULL; - ompi_group_t *group=comm->c_local_group; - orte_process_name_t port; - orte_rml_tag_t tag=ORTE_RML_TAG_INVALID; - opal_buffer_t *nbuf=NULL, *nrbuf=NULL; - ompi_proc_t **proc_list=NULL, **new_proc_list = NULL; - int32_t i,j, new_proc_len; - ompi_group_t *new_group_pointer; - - orte_namelist_t *nm; - orte_rml_recv_cb_t xfer; - orte_process_name_t carport; - - OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept with port %s %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - port_string, send_first ? "sending first" : "recv first")); - - /* set default error return */ - *newcomm = MPI_COMM_NULL; - - size = ompi_comm_size ( comm ); - rank = ompi_comm_rank ( comm ); - - /* extract the process name from the port string, if given, and - * set us up to communicate with it - */ - if (NULL != port_string && 0 < strlen(port_string)) { - char *hnp_uri, *rml_uri; - - /* separate the string into the HNP and RML URI and tag */ - if (ORTE_SUCCESS != (rc = parse_port_name(port_string, &hnp_uri, &rml_uri, &tag))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* extract the originating proc's name */ - if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(rml_uri, &port, NULL))) { - ORTE_ERROR_LOG(rc); - free(hnp_uri); free(rml_uri); - return rc; - } - /* make sure we can route rml messages to the destination job */ - if (ORTE_SUCCESS != (rc = route_to_port(hnp_uri, &port))) { - ORTE_ERROR_LOG(rc); - free(hnp_uri); free(rml_uri); - return rc; - } - free(hnp_uri); free(rml_uri); - } - - if ( rank == root ) { - /* Generate the message buffer containing the number of processes and the list of - participating processes */ - nbuf = OBJ_NEW(opal_buffer_t); - if (NULL == nbuf) { - return OMPI_ERROR; - } - - if (OPAL_SUCCESS != (rc = opal_dss.pack(nbuf, &size, 1, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto exit; - } - - if (OMPI_GROUP_IS_DENSE(group)) { - ompi_proc_pack(group->grp_proc_pointers, size, false, nbuf); - } else { - proc_list = (ompi_proc_t **) calloc (group->grp_proc_count, - sizeof (ompi_proc_t *)); - for (i=0 ; igrp_proc_count ; i++) { - if (NULL == (proc_list[i] = ompi_group_peer_lookup(group,i))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - rc = ORTE_ERR_NOT_FOUND; - goto exit; - } - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept adding %s to proc list", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - OMPI_NAME_PRINT(&proc_list[i]->super.proc_name))); - } - ompi_proc_pack(proc_list, size, false, nbuf); - } - - /* pack wireup info - this is required so that all involved parties can - * discover how to talk to each other. For example, consider the case - * where we connect_accept to one independent job (B), and then connect_accept - * to another one (C) to wire all three of us together. Job B will not know - * how to talk to job C at the OOB level because the two of them didn't - * directly connect_accept to each other. Hence, we include the required - * wireup info at this first exchange - */ - if (ORTE_SUCCESS != (rc = orte_routed.get_wireup_info(nbuf))) { - ORTE_ERROR_LOG(rc); - goto exit; - } - - OBJ_CONSTRUCT(&xfer, orte_rml_recv_cb_t); - /* Exchange the number and the list of processes in the groups */ - if ( send_first ) { - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept sending first to %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&port))); - rc = orte_rml.send_buffer_nb(&port, nbuf, tag, orte_rml_send_callback, NULL); - /* setup to recv */ - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept waiting for response", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - xfer.active = true; - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, tag, - ORTE_RML_NON_PERSISTENT, - orte_rml_recv_callback, &xfer); - /* wait for response */ - OMPI_WAIT_FOR_COMPLETION(xfer.active); - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept got data from %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&xfer.name))); - - } else { - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept recving first", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - /* setup to recv */ - xfer.active = true; - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, tag, - ORTE_RML_NON_PERSISTENT, - orte_rml_recv_callback, &xfer); - /* wait for response */ - OMPI_WAIT_FOR_COMPLETION(xfer.active); - /* now send our info */ - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept sending info to %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&xfer.name))); - rc = orte_rml.send_buffer_nb(&xfer.name, nbuf, tag, orte_rml_send_callback, NULL); - } - - if (OPAL_SUCCESS != (rc = opal_dss.unload(&xfer.data, &rnamebuf, &rnamebuflen))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&xfer.data); - goto exit; - } - carport.jobid = xfer.name.jobid; - carport.vpid = xfer.name.vpid; - OBJ_DESTRUCT(&xfer); - } - - /* First convert the size_t to an int so we can cast in the bcast to a void * - * if we don't then we will get badness when using big vs little endian - * THIS IS NO LONGER REQUIRED AS THE LENGTH IS NOW A STD_CNTR_T, WHICH - * CORRELATES TO AN INT32 - */ - rnamebuflen_int = (int)rnamebuflen; - - /* bcast the buffer-length to all processes in the local comm */ - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept bcast buffer length", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - rc = comm->c_coll.coll_bcast (&rnamebuflen_int, 1, MPI_INT, root, comm, - comm->c_coll.coll_bcast_module); - if ( OMPI_SUCCESS != rc ) { - goto exit; - } - rnamebuflen = rnamebuflen_int; - - if ( rank != root ) { - /* non root processes need to allocate the buffer manually */ - rnamebuf = (char *) malloc(rnamebuflen); - if ( NULL == rnamebuf ) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - } - - /* bcast list of processes to all procs in local group - and reconstruct the data. Note that proc_get_proclist - adds processes, which were not known yet to our - process pool. - */ - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept bcast proc list", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - rc = comm->c_coll.coll_bcast (rnamebuf, rnamebuflen_int, MPI_BYTE, root, comm, - comm->c_coll.coll_bcast_module); - if ( OMPI_SUCCESS != rc ) { - goto exit; - } - - nrbuf = OBJ_NEW(opal_buffer_t); - if (NULL == nrbuf) { - goto exit; - } - if ( OPAL_SUCCESS != ( rc = opal_dss.load(nrbuf, rnamebuf, rnamebuflen))) { - ORTE_ERROR_LOG(rc); - goto exit; - } - num_vals = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(nrbuf, &rsize, &num_vals, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - goto exit; - } - rc = ompi_proc_unpack(nrbuf, rsize, &rprocs, false, &new_proc_len, &new_proc_list); - if ( OMPI_SUCCESS != rc ) { - goto exit; - } - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept unpacked %d new procs", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), new_proc_len)); - - /* If we added new procs, we need to do the modex and then call - PML add_procs */ - if (new_proc_len > 0) { - opal_list_t all_procs; - orte_namelist_t *name; - opal_process_name_t *ids; - opal_list_t myvals; - opal_value_t *kv; - - /* we first need to give the wireup info to our routed module. - * Not every routed module will need it, but some do require - * this info before we can do any comm - */ - if (ORTE_SUCCESS != (rc = orte_routed.init_routes(OMPI_CAST_RTE_NAME(&rprocs[0]->super.proc_name)->jobid, nrbuf))) { - ORTE_ERROR_LOG(rc); - goto exit; - } - - OBJ_CONSTRUCT(&all_procs, opal_list_t); - - if (send_first) { - for (i = 0 ; i < rsize ; ++i) { - name = OBJ_NEW(orte_namelist_t); - name->name = *OMPI_CAST_RTE_NAME(&rprocs[i]->super.proc_name); - opal_list_append(&all_procs, &name->super); - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept send first adding %s to allgather list", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&name->name))); - } - for (i = 0 ; i < group->grp_proc_count ; ++i) { - name = OBJ_NEW(orte_namelist_t); - name->name = *OMPI_CAST_RTE_NAME(&(ompi_group_peer_lookup(group, i)->super.proc_name)); - opal_list_append(&all_procs, &name->super); - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept send first adding %s to allgather list", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&name->name))); - } - - } else { - for (i = 0 ; i < group->grp_proc_count ; ++i) { - name = OBJ_NEW(orte_namelist_t); - name->name = *OMPI_CAST_RTE_NAME(&(ompi_group_peer_lookup(group, i)->super.proc_name)); - opal_list_append(&all_procs, &name->super); - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept recv first adding %s to allgather list", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&name->name))); - } - for (i = 0 ; i < rsize ; ++i) { - name = OBJ_NEW(orte_namelist_t); - name->name = *OMPI_CAST_RTE_NAME(&rprocs[i]->super.proc_name); - opal_list_append(&all_procs, &name->super); - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept recv first adding %s to allgather list", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&name->name))); - } - - } - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept executing modex", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* setup the modex */ - ids = (opal_process_name_t*)malloc(opal_list_get_size(&all_procs) * sizeof(opal_process_name_t)); - /* copy across the list of participants */ - i=0; - OPAL_LIST_FOREACH(nm, &all_procs, orte_namelist_t) { - ids[i++] = nm->name; - } - OPAL_LIST_DESTRUCT(&all_procs); - /* perform it */ - opal_pmix.fence(ids, i); - free(ids); - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept adding procs", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* set the locality of the new procs - the required info should - * have been included in the data exchange */ - for (j=0; j < new_proc_len; j++) { - OBJ_CONSTRUCT(&myvals, opal_list_t); - if (OMPI_SUCCESS != (rc = opal_dstore.fetch(opal_dstore_internal, - &new_proc_list[j]->super.proc_name, - OPAL_DSTORE_LOCALITY, &myvals))) { - new_proc_list[j]->super.proc_flags = OPAL_PROC_NON_LOCAL; - } else { - kv = (opal_value_t*)opal_list_get_first(&myvals); - new_proc_list[j]->super.proc_flags = kv->data.uint16; - } - OPAL_LIST_DESTRUCT(&myvals); - } - - if (OMPI_SUCCESS != (rc = MCA_PML_CALL(add_procs(new_proc_list, new_proc_len)))) { - ORTE_ERROR_LOG(rc); - goto exit; - } - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept new procs added", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - } - - OBJ_RELEASE(nrbuf); - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept allocating group size %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), rsize)); - - new_group_pointer=ompi_group_allocate(rsize); - if( NULL == new_group_pointer ) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - /* put group elements in the list */ - for (j = 0; j < rsize; j++) { - new_group_pointer->grp_proc_pointers[j] = rprocs[j]; - } /* end proc loop */ - - /* increment proc reference counters */ - ompi_group_increment_proc_count(new_group_pointer); - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept setting up communicator", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* set up communicator structure */ - rc = ompi_comm_set ( &newcomp, /* new comm */ - comm, /* old comm */ - group->grp_proc_count, /* local_size */ - NULL, /* local_procs */ - rsize, /* remote_size */ - NULL , /* remote_procs */ - NULL, /* attrs */ - comm->error_handler, /* error handler */ - NULL, /* topo component */ - group, /* local group */ - new_group_pointer /* remote group */ - ); - if ( NULL == newcomp ) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto exit; - } - - ompi_group_decrement_proc_count (new_group_pointer); - OBJ_RELEASE(new_group_pointer); - new_group_pointer = MPI_GROUP_NULL; - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept allocate comm_cid", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* allocate comm_cid */ - rc = ompi_comm_nextcid ( newcomp, /* new communicator */ - comm, /* old communicator */ - NULL, /* bridge comm */ - &root, /* local leader */ - &carport, /* remote leader */ - OMPI_COMM_CID_INTRA_OOB, /* mode */ - send_first ); /* send or recv first */ - if ( OMPI_SUCCESS != rc ) { - goto exit; - } - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept activate comm", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* activate comm and init coll-component */ - rc = ompi_comm_activate ( &newcomp, /* new communicator */ - comm, /* old communicator */ - NULL, /* bridge comm */ - &root, /* local leader */ - &carport, /* remote leader */ - OMPI_COMM_CID_INTRA_OOB, /* mode */ - send_first ); /* send or recv first */ - if ( OMPI_SUCCESS != rc ) { - goto exit; - } - - /* Question: do we have to re-start some low level stuff - to enable the usage of fast communication devices - between the two worlds ? - */ - - exit: - if ( NULL != rprocs ) { - free ( rprocs ); - } - if ( NULL != proc_list ) { - free ( proc_list ); - } - if ( NULL != new_proc_list ) { - free ( new_proc_list ); - } - if ( OMPI_SUCCESS != rc ) { - if ( MPI_COMM_NULL != newcomp && NULL != newcomp ) { - OBJ_RETAIN(newcomp); - newcomp = MPI_COMM_NULL; - } - } - - *newcomm = newcomp; - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:connect_accept complete", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - return rc; -} - -static int construct_peers(ompi_group_t *group, opal_list_t *peers) -{ - int i; - orte_namelist_t *nm, *n2; - ompi_proc_t *proct; - - if (OMPI_GROUP_IS_DENSE(group)) { - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:disconnect group is dense", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - for (i=0; i < group->grp_proc_count; i++) { - if (NULL == (proct = group->grp_proc_pointers[i])) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - /* add to the list of peers */ - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:disconnect adding participant %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT((const orte_process_name_t *)&proct->super.proc_name))); - nm = OBJ_NEW(orte_namelist_t); - nm->name = *(orte_process_name_t*)&proct->super.proc_name; - /* need to maintain an ordered list to ensure the tracker signatures - * match across all procs */ - OPAL_LIST_FOREACH(n2, peers, orte_namelist_t) { - if (opal_compare_proc(nm->name, n2->name) < 0) { - opal_list_insert_pos(peers, &n2->super, &nm->super); - nm = NULL; - break; - } - } - if (NULL != nm) { - /* append to the end */ - opal_list_append(peers, &nm->super); - } - } - } else { - for (i=0; i < group->grp_proc_count; i++) { - /* lookup this proc_t to get the process name */ - if (NULL == (proct = ompi_group_peer_lookup(group, i))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - /* add to the list of peers */ - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:disconnect adding participant %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT((const orte_process_name_t *)&proct->super.proc_name))); - nm = OBJ_NEW(orte_namelist_t); - nm->name = *(orte_process_name_t*)&proct->super.proc_name; - /* need to maintain an ordered list to ensure the tracker signatures - * match across all procs */ - OPAL_LIST_FOREACH(n2, peers, orte_namelist_t) { - if (opal_compare_proc(nm->name, n2->name) < 0) { - opal_list_insert_pos(peers, &n2->super, &nm->super); - nm = NULL; - break; - } - } - if (NULL != nm) { - /* append to the end */ - opal_list_append(peers, &nm->super); - } - } - } - return ORTE_SUCCESS; -} - -static int disconnect(ompi_communicator_t *comm) -{ - int ret, i; - ompi_group_t *group; - opal_list_t coll; - orte_namelist_t *nm; - opal_process_name_t *ids; - - /* Note that we explicitly use an RTE-based barrier (vs. an MPI - barrier). See a lengthy comment in - ompi/runtime/ompi_mpi_finalize.c for a much more detailed - rationale. */ - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:disconnect comm_cid %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), comm->c_contextid)); - - /* setup the collective */ - OBJ_CONSTRUCT(&coll, opal_list_t); - /* RHC: assuming for now that this must flow across all - * local and remote group members */ - group = comm->c_local_group; - if (ORTE_SUCCESS != (ret = construct_peers(group, &coll))) { - ORTE_ERROR_LOG(ret); - OPAL_LIST_DESTRUCT(&coll); - return ret; - } - /* do the same for the remote group */ - group = comm->c_remote_group; - if (ORTE_SUCCESS != (ret = construct_peers(group, &coll))) { - ORTE_ERROR_LOG(ret); - OPAL_LIST_DESTRUCT(&coll); - return ret; - } - - /* setup the ids */ - ids = (opal_process_name_t*)malloc(opal_list_get_size(&coll) * sizeof(opal_process_name_t)); - i=0; - OPAL_LIST_FOREACH(nm, &coll, orte_namelist_t) { - ids[i++] = nm->name; - } - OPAL_LIST_DESTRUCT(&coll); - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:disconnect calling barrier on comm_cid %d with %d participants", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), comm->c_contextid, i)); - opal_pmix.fence(ids, i); - free(ids); - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:disconnect barrier complete for comm_cid %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), comm->c_contextid)); - - return OMPI_SUCCESS; -} - -static int spawn(int count, const char *array_of_commands[], - char **array_of_argv[], - const int array_of_maxprocs[], - const MPI_Info array_of_info[], - const char *port_name) -{ - int rc, i, j, counter; - int have_wdir=0; - int flag=0; - char cwd[OPAL_PATH_MAX]; - char host[OPAL_MAX_INFO_VAL]; /*** should define OMPI_HOST_MAX ***/ - char prefix[OPAL_MAX_INFO_VAL]; - char stdin_target[OPAL_MAX_INFO_VAL]; - char params[OPAL_MAX_INFO_VAL]; - char mapper[OPAL_MAX_INFO_VAL]; - int npernode; - char slot_list[OPAL_MAX_INFO_VAL]; - - orte_job_t *jdata; - orte_app_context_t *app; - bool local_spawn, non_mpi; - char **envars; - - /* parse the info object */ - /* check potentially for: - - "host": desired host where to spawn the processes - - "hostfile": hostfile containing hosts where procs are - to be spawned - - "add-host": add the specified hosts to the known list - of available resources and spawn these - procs on them - - "add-hostfile": add the hosts in the hostfile to the - known list of available resources and spawn - these procs on them - - "env": a newline-delimited list of envar values to be - placed into the app's environment (of form "foo=bar") - - "ompi_prefix": the path to the root of the directory tree where ompi - executables and libraries can be found on all nodes - used to spawn these procs - - "arch": desired architecture - - "wdir": directory, where executable can be found - - "path": list of directories where to look for the executable - - "file": filename, where additional information is provided. - - "soft": see page 92 of MPI-2. - - "mapper": indicate the mapper to be used for the job - - "display_map": display the map of the spawned job - - "npernode": number of procs/node to spawn - - "pernode": spawn one proc/node - - "ppr": spawn specified number of procs per specified object - - "map_by": specify object by which the procs should be mapped - - "rank_by": specify object by which the procs should be ranked - - "bind_to": specify object to which the procs should be bound - - "ompi_preload_binary": move binaries to nodes prior to execution - - "ompi_preload_files": move specified files to nodes prior to execution - - "ompi_non_mpi": spawned job will not call MPI_Init - - "ompi_param": list of MCA params to be in the spawned job's environment - - "env": newline (\n) delimited list of envar values to be passed to spawned procs - */ - - /* setup the job object */ - jdata = OBJ_NEW(orte_job_t); - - /* Convert the list of commands to an array of orte_app_context_t - pointers */ - for (i = 0; i < count; ++i) { - app = OBJ_NEW(orte_app_context_t); - if (NULL == app) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(jdata); - opal_progress_event_users_decrement(); - return ORTE_ERR_OUT_OF_RESOURCE; - } - /* add the app to the job data */ - opal_pointer_array_add(jdata->apps, app); - app->idx = i; - jdata->num_apps++; - - /* copy over the name of the executable */ - app->app = strdup(array_of_commands[i]); - if (NULL == app->app) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(jdata); - opal_progress_event_users_decrement(); - return ORTE_ERR_OUT_OF_RESOURCE; - } - /* record the number of procs to be generated */ - app->num_procs = array_of_maxprocs[i]; - - /* copy over the argv array */ - counter = 1; - - if (MPI_ARGVS_NULL != array_of_argv && - MPI_ARGV_NULL != array_of_argv[i]) { - /* first need to find out how many entries there are */ - j=0; - while (NULL != array_of_argv[i][j]) { - j++; - } - counter += j; - } - - /* now copy them over, ensuring to NULL terminate the array */ - app->argv = (char**)malloc((1 + counter) * sizeof(char*)); - if (NULL == app->argv) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(jdata); - opal_progress_event_users_decrement(); - return ORTE_ERR_OUT_OF_RESOURCE; - } - app->argv[0] = strdup(array_of_commands[i]); - for (j=1; j < counter; j++) { - app->argv[j] = strdup(array_of_argv[i][j-1]); - } - app->argv[counter] = NULL; - - - /* the environment gets set by the launcher - * all we need to do is add the specific values - * needed for comm_spawn - */ - /* Add environment variable with the contact information for the - child processes. - */ - app->env = (char**)malloc(2 * sizeof(char*)); - if (NULL == app->env) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OBJ_RELEASE(jdata); - opal_progress_event_users_decrement(); - return ORTE_ERR_OUT_OF_RESOURCE; - } - asprintf(&(app->env[0]), "OMPI_PARENT_PORT=%s", port_name); - app->env[1] = NULL; - for (j = 0; NULL != environ[j]; ++j) { - if (0 == strncmp("OMPI_", environ[j], 5)) { - opal_argv_append_nosize(&app->env, environ[j]); - } - } - - /* Check for well-known info keys */ - have_wdir = 0; - if ( array_of_info != NULL && array_of_info[i] != MPI_INFO_NULL ) { - - /* check for personality */ - ompi_info_get (array_of_info[i], "personality", sizeof(host) - 1, host, &flag); - if ( flag ) { - jdata->personality = strdup(host); - } - - /* check for 'host' */ - ompi_info_get (array_of_info[i], "host", sizeof(host) - 1, host, &flag); - if ( flag ) { - orte_set_attribute(&app->attributes, ORTE_APP_DASH_HOST, false, host, OPAL_STRING); - } - - /* check for 'hostfile' */ - ompi_info_get (array_of_info[i], "hostfile", sizeof(host) - 1, host, &flag); - if ( flag ) { - orte_set_attribute(&app->attributes, ORTE_APP_HOSTFILE, false, host, OPAL_STRING); - } - - /* check for 'add-hostfile' */ - ompi_info_get (array_of_info[i], "add-hostfile", sizeof(host) - 1, host, &flag); - if ( flag ) { - orte_set_attribute(&app->attributes, ORTE_APP_ADD_HOSTFILE, false, host, OPAL_STRING); - } - - /* check for 'add-host' */ - ompi_info_get (array_of_info[i], "add-host", sizeof(host) - 1, host, &flag); - if ( flag ) { - orte_set_attribute(&app->attributes, ORTE_APP_ADD_HOST, false, host, OPAL_STRING); - } - - /* check for env */ - ompi_info_get (array_of_info[i], "env", sizeof(host)-1, host, &flag); - if ( flag ) { - envars = opal_argv_split(host, '\n'); - for (j=0; NULL != envars[j]; j++) { - opal_argv_append_nosize(&app->env, envars[j]); - } - opal_argv_free(envars); - } - - /* 'path', 'arch', 'file', 'soft' -- to be implemented */ - - /* check for 'ompi_prefix' (OMPI-specific -- to effect the same - * behavior as --prefix option to orterun) - */ - ompi_info_get (array_of_info[i], "ompi_prefix", sizeof(prefix) - 1, prefix, &flag); - if ( flag ) { - orte_set_attribute(&app->attributes, ORTE_APP_PREFIX_DIR, false, prefix, OPAL_STRING); - } - - /* check for 'wdir' */ - ompi_info_get (array_of_info[i], "wdir", sizeof(cwd) - 1, cwd, &flag); - if ( flag ) { - app->cwd = strdup(cwd); - have_wdir = 1; - } - - /* check for 'mapper' */ - ompi_info_get(array_of_info[i], "mapper", sizeof(mapper) - 1, mapper, &flag); - if ( flag ) { - if (NULL == jdata->map) { - jdata->map = OBJ_NEW(orte_job_map_t); - if (NULL == jdata->map) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - } - jdata->map->req_mapper = strdup(mapper); - } - - /* check for 'display_map' */ - ompi_info_get_bool(array_of_info[i], "display_map", &local_spawn, &flag); - if ( flag ) { - if (NULL == jdata->map) { - jdata->map = OBJ_NEW(orte_job_map_t); - if (NULL == jdata->map) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - } - jdata->map->display_map = true; - } - - /* check for 'npernode' and 'ppr' */ - ompi_info_get (array_of_info[i], "npernode", sizeof(slot_list) - 1, slot_list, &flag); - if ( flag ) { - if (ORTE_SUCCESS != ompi_info_value_to_int(slot_list, &npernode)) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - if (NULL == jdata->map) { - jdata->map = OBJ_NEW(orte_job_map_t); - if (NULL == jdata->map) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - } - if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) { - /* not allowed to provide multiple mapping policies */ - return OMPI_ERROR; - } - ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_PPR); - asprintf(&(jdata->map->ppr), "%d:n", npernode); - } - ompi_info_get (array_of_info[i], "pernode", sizeof(slot_list) - 1, slot_list, &flag); - if ( flag ) { - if (NULL == jdata->map) { - jdata->map = OBJ_NEW(orte_job_map_t); - if (NULL == jdata->map) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - } - if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) { - /* not allowed to provide multiple mapping policies */ - return OMPI_ERROR; - } - ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_PPR); - jdata->map->ppr = strdup("1:n"); - } - ompi_info_get (array_of_info[i], "ppr", sizeof(slot_list) - 1, slot_list, &flag); - if ( flag ) { - if (NULL == jdata->map) { - jdata->map = OBJ_NEW(orte_job_map_t); - if (NULL == jdata->map) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - } - if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) { - /* not allowed to provide multiple mapping policies */ - return OMPI_ERROR; - } - ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_MAPPING_PPR); - jdata->map->ppr = strdup(slot_list); - } - - /* check for 'map_by' */ - ompi_info_get(array_of_info[i], "map_by", sizeof(slot_list) - 1, slot_list, &flag); - if ( flag ) { - if (NULL == jdata->map) { - jdata->map = OBJ_NEW(orte_job_map_t); - if (NULL == jdata->map) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - } - if (ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) { - /* not allowed to provide multiple mapping policies */ - return OMPI_ERROR; - } - if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(&jdata->map->mapping, - NULL, slot_list))) { - return rc; - } - } - - /* check for 'rank_by' */ - ompi_info_get(array_of_info[i], "rank_by", sizeof(slot_list) - 1, slot_list, &flag); - if ( flag ) { - if (NULL == jdata->map) { - jdata->map = OBJ_NEW(orte_job_map_t); - if (NULL == jdata->map) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - } - if (ORTE_RANKING_POLICY_IS_SET(jdata->map->ranking)) { - /* not allowed to provide multiple ranking policies */ - return OMPI_ERROR; - } - if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_ranking_policy(&jdata->map->ranking, - jdata->map->mapping, slot_list))) { - return rc; - } - } - -#if OPAL_HAVE_HWLOC - /* check for 'bind_to' */ - ompi_info_get(array_of_info[i], "bind_to", sizeof(slot_list) - 1, slot_list, &flag); - if ( flag ) { - if (NULL == jdata->map) { - jdata->map = OBJ_NEW(orte_job_map_t); - if (NULL == jdata->map) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - } - if (OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) { - /* not allowed to provide multiple binding policies */ - return OMPI_ERROR; - } - if (ORTE_SUCCESS != (rc = opal_hwloc_base_set_binding_policy(&jdata->map->binding, slot_list))) { - return rc; - } - } -#endif - - /* check for 'preload_binary' */ - ompi_info_get_bool(array_of_info[i], "ompi_preload_binary", &local_spawn, &flag); - if ( flag ) { - orte_set_attribute(&app->attributes, ORTE_APP_PRELOAD_BIN, false, NULL, OPAL_BOOL); - } - - /* check for 'preload_files' */ - ompi_info_get (array_of_info[i], "ompi_preload_files", sizeof(cwd) - 1, cwd, &flag); - if ( flag ) { - orte_set_attribute(&app->attributes, ORTE_APP_PRELOAD_FILES, false, cwd, OPAL_STRING); - } - - /* see if this is a non-mpi job - if so, then set the flag so ORTE - * knows what to do - */ - ompi_info_get_bool(array_of_info[i], "ompi_non_mpi", &non_mpi, &flag); - if (flag && non_mpi) { - orte_set_attribute(&jdata->attributes, ORTE_JOB_NON_ORTE_JOB, false, NULL, OPAL_BOOL); - } - - /* see if this is an MCA param that the user wants applied to the child job */ - ompi_info_get (array_of_info[i], "ompi_param", sizeof(params) - 1, params, &flag); - if ( flag ) { - opal_argv_append_unique_nosize(&app->env, params, true); - } - - /* see if user specified what to do with stdin - defaults to - * not forwarding stdin to child processes - */ - ompi_info_get (array_of_info[i], "ompi_stdin_target", sizeof(stdin_target) - 1, stdin_target, &flag); - if ( flag ) { - if (0 == strcmp(stdin_target, "all")) { - jdata->stdin_target = ORTE_VPID_WILDCARD; - } else if (0 == strcmp(stdin_target, "none")) { - jdata->stdin_target = ORTE_VPID_INVALID; - } else { - jdata->stdin_target = strtoul(stdin_target, NULL, 10); - } - } - } - - /* default value: If the user did not tell us where to look for the - * executable, we assume the current working directory - */ - if ( !have_wdir ) { - if (OMPI_SUCCESS != (rc = opal_getcwd(cwd, OPAL_PATH_MAX))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(jdata); - opal_progress_event_users_decrement(); - return rc; - } - app->cwd = strdup(cwd); - } - - /* leave the map info alone - the launcher will - * decide where to put things - */ - } /* for (i = 0 ; i < count ; ++i) */ - - /* default the personality */ - if (NULL == jdata->personality) { - jdata->personality = strdup("ompi"); - } - - /* spawn procs */ - rc = orte_plm.spawn(jdata); - OBJ_RELEASE(jdata); - - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - opal_progress_event_users_decrement(); - return MPI_ERR_SPAWN; - } - - return OMPI_SUCCESS; -} - -/* - * The port_name is constructed to support the ability - * to route messages between different jobs. Messages - * between job families are routed via their respective HNPs - * to reduce connection count and to support connect/accept. - * Thus, the port_name consists of three fields: - * (a) the contact info of the process opening the port. This - * is provided in case the routed module wants to communicate - * directly between the procs. - * (b) the tag of the port. The reason for adding the tag is - * to make the port unique for multi-threaded scenarios. - * (c) the contact info for the job's HNP. This will be - * used to route messages between job families - * - * Construction of the port name is done here - as opposed to - * in the routed module itself - because two mpiruns using different - * routed modules could exchange the port name (via pubsub). The - * format of the port name must, therefore, be universal. - * - * Optionally can provide a tag to be used - otherwise, we supply the - * next dynamically assigned tag - */ -static int open_port(char *port_name, orte_rml_tag_t given_tag) -{ - char *rml_uri=NULL; - int rc, len; - char tag[12]; - - /* if we are a singleton and the supporting HNP hasn't - * been spawned, then do so now - */ - if ((orte_process_info.proc_type & ORTE_PROC_SINGLETON) && - !orte_routing_is_enabled) { - if (ORTE_SUCCESS != (rc = orte_plm_base_fork_hnp())) { - ORTE_ERROR_LOG(rc); - return OMPI_ERROR; - } - orte_routing_is_enabled = true; - /* need to init_routes again to redirect messages - * thru the HNP - */ - orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, NULL); - } - - if (NULL == orte_process_info.my_hnp_uri) { - rc = OMPI_ERR_NOT_AVAILABLE; - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - if (NULL == (rml_uri = orte_rml.get_contact_info())) { - rc = OMPI_ERROR; - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - if (ORTE_RML_TAG_INVALID == given_tag) { - OPAL_THREAD_LOCK(&ompi_dpm_port_mutex); - snprintf(tag, 12, "%d", next_tag); - next_tag++; - OPAL_THREAD_UNLOCK(&ompi_dpm_port_mutex); - } else { - snprintf(tag, 12, "%d", given_tag); - } - - - len = strlen(orte_process_info.my_hnp_uri) + strlen(rml_uri) + strlen(tag); - - /* if the overall port name is too long, we abort */ - if (len > (MPI_MAX_PORT_NAME-1)) { - rc = OMPI_ERR_VALUE_OUT_OF_BOUNDS; - goto cleanup; - } - - /* assemble the port name */ - snprintf(port_name, MPI_MAX_PORT_NAME, "%s+%s:%s", orte_process_info.my_hnp_uri, rml_uri, tag); - rc = OMPI_SUCCESS; - -cleanup: - if (NULL != rml_uri) { - free(rml_uri); - } - - return rc; -} - - -static int route_to_port(char *rml_uri, orte_process_name_t *rproc) -{ - opal_buffer_t route; - int rc; - - /* We need to ask the routed module to init_routes so it can do the - * right thing. In most cases, it will route any messages to the - * proc through our HNP - however, this is NOT the case in all - * circumstances, so we need to let the routed module decide what - * to do. - */ - /* pack a cmd so the buffer can be unpacked correctly */ - OBJ_CONSTRUCT(&route, opal_buffer_t); - - /* pack the provided uri */ - opal_dss.pack(&route, &rml_uri, 1, OPAL_STRING); - - /* init the route */ - if (ORTE_SUCCESS != (rc = orte_routed.init_routes(rproc->jobid, &route))) { - ORTE_ERROR_LOG(rc); - } - OBJ_DESTRUCT(&route); - - /* nothing more to do here */ - return rc; -} - -static int parse_port_name(const char *port_name, - char **hnp_uri, - char **rml_uri, - orte_rml_tag_t *ptag) -{ - char *tmpstring=NULL, *ptr; - int tag; - int rc; - - /* don't mangle the port name */ - tmpstring = strdup(port_name); - if (NULL == tmpstring) { - return OMPI_ERR_OUT_OF_RESOURCE; - } - - /* find the ':' demarking the RML tag we added to the end */ - if (NULL == (ptr = strrchr(tmpstring, ':'))) { - rc = OMPI_ERR_NOT_FOUND; - goto cleanup; - } - - /* terminate the port_name at that location */ - *ptr = '\0'; - ptr++; - - /* convert the RML tag */ - (void)sscanf(ptr,"%d", &tag); - - /* now split out the second field - the uri of the remote proc */ - if (NULL == (ptr = strchr(tmpstring, '+'))) { - rc = OMPI_ERR_NOT_FOUND; - goto cleanup; - } - *ptr = '\0'; - ptr++; - - /* save that info */ - if(NULL != hnp_uri) *hnp_uri = tmpstring; - else free(tmpstring); - if(NULL != rml_uri) *rml_uri = strdup(ptr); - if(NULL != ptag) *ptag = tag; - - return OMPI_SUCCESS; - -cleanup: - /* release the tmp storage */ - free(tmpstring); - return rc; -} - -static int close_port(const char *port_name) -{ - /* nothing to do here - user is responsible for the memory */ - return OMPI_SUCCESS; -} - -static int dyn_init(void) -{ - char *port_name=NULL; - int root=0, rc; - bool send_first = true; - ompi_communicator_t *newcomm=NULL; - - /* if env-variable is set, we are a dynamically spawned - * child - parse port and call comm_connect_accept */ - if (NULL == (port_name = ompi_dpm_base_dyn_init())) { - /* nothing to do */ - return OMPI_SUCCESS; - } - - OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:dyn_init with port %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - port_name)); - - rc = connect_accept (MPI_COMM_WORLD, root, port_name, send_first, &newcomm); - free(port_name); - if (OMPI_SUCCESS != rc) { - return rc; - } - - /* originally, we set comm_parent to comm_null (in comm_init), - * now we have to decrease the reference counters to the according - * objects - */ - OBJ_RELEASE(ompi_mpi_comm_parent->c_local_group); - OBJ_RELEASE(ompi_mpi_comm_parent->error_handler); - OBJ_RELEASE(ompi_mpi_comm_parent); - - /* Set the parent communicator */ - ompi_mpi_comm_parent = newcomm; - - /* Set name for debugging purposes */ - snprintf(newcomm->c_name, MPI_MAX_OBJECT_NAME, "MPI_COMM_PARENT"); - newcomm->c_flags |= OMPI_COMM_NAMEISSET; - - return OMPI_SUCCESS; -} - - -/* - * finalize the module - */ -static int finalize(void) -{ - OBJ_DESTRUCT(&ompi_dpm_port_mutex); - OPAL_LIST_DESTRUCT(&orte_dpm_acceptors); - OPAL_LIST_DESTRUCT(&orte_dpm_connectors); - return OMPI_SUCCESS; -} - -static void timeout_cb(int fd, short args, void *cbdata) -{ - orte_dpm_prequest_t *req = (orte_dpm_prequest_t*)cbdata; - - /* remove the request from the list */ - OPAL_THREAD_LOCK(&ompi_dpm_port_mutex); - opal_list_remove_item(&orte_dpm_connectors, &req->super); - OPAL_THREAD_UNLOCK(&ompi_dpm_port_mutex); - - /* this connection request failed - notify the caller */ - req->cbfunc(MPI_COMM_NULL, NULL, req->cbdata); - - /* cleanup */ - OBJ_RELEASE(req); -} - -static void process_request(orte_process_name_t* sender, - opal_buffer_t *buffer, - bool connector, - ompi_communicator_t **newcomm, - ompi_proc_t **proct) -{ - ompi_communicator_t *newcomp=MPI_COMM_NULL; - ompi_group_t *group=MPI_COMM_SELF->c_local_group; - ompi_group_t *new_group_pointer; - ompi_proc_t **rprocs=NULL; - ompi_proc_t **new_proc_list=NULL; - int new_proc_len; - opal_buffer_t *xfer; - int cnt, rc; - uint32_t id; - - OPAL_OUTPUT_VERBOSE((2, ompi_dpm_base_framework.framework_output, - "%s dpm:pconprocess: PROCESS REQUEST: %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - connector ? "connector" : "acceptor")); - - /* if we are the acceptor, unpack the remote peer's request id */ - if (!connector) { - cnt=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &id, &cnt, OPAL_UINT32))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - OPAL_OUTPUT_VERBOSE((2, ompi_dpm_base_framework.framework_output, - "%s dpm:pconprocess: PROCESS REQUEST ID: %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), id)); - } - - /* unpack the proc info */ - if (OMPI_SUCCESS != (rc = ompi_proc_unpack(buffer, 1, &rprocs, false, &new_proc_len, &new_proc_list))) { - ORTE_ERROR_LOG(rc); - return; - } - - /* If we added new procs, we need to unpack the modex info - * and then call PML add_procs - */ - if (0 < new_proc_len) { - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:pconprocess: process modex", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:pconprocess: adding procs", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - if (OMPI_SUCCESS != (rc = MCA_PML_CALL(add_procs(new_proc_list, new_proc_len)))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:orte:pconnect new procs added", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - } - - /* if we are the acceptor, we now have to send the requestor our - * info so we can collaborate on setup of the communicator - we - * must wait until this point so the route can be initiated, if - * required - */ - if (!connector) { - xfer = OBJ_NEW(opal_buffer_t); - /* pack the request id */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(xfer, &id, 1, OPAL_UINT32))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(xfer); - goto cleanup; - } - /* pack the remaining info */ - if (ORTE_SUCCESS != ompi_proc_pack(group->grp_proc_pointers, 1, true, xfer)) { - OBJ_RELEASE(xfer); - goto cleanup; - } - /* send to requestor */ - if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(sender, xfer, OMPI_RML_PCONNECT_TAG, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(xfer); - goto cleanup; - } - } - - /* allocate a new group */ - new_group_pointer=ompi_group_allocate(1); - if( NULL == new_group_pointer ) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto cleanup; - } - - /* put group element in the list */ - new_group_pointer->grp_proc_pointers[0] = rprocs[0]; - - /* increment proc reference counter */ - ompi_group_increment_proc_count(new_group_pointer); - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:pconprocess setting up communicator", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* set up communicator structure */ - rc = ompi_comm_set(&newcomp, /* new comm */ - MPI_COMM_SELF, /* old comm */ - 1, /* local_size */ - NULL, /* local_procs */ - 1, /* remote_size */ - NULL, /* remote_procs */ - NULL, /* attrs */ - MPI_COMM_SELF->error_handler, /* error handler */ - NULL, /* topo component */ - group, /* local group */ - new_group_pointer /* remote group */ - ); - if (NULL == newcomp) { - rc = OMPI_ERR_OUT_OF_RESOURCE; - goto cleanup; - } - - ompi_group_decrement_proc_count (new_group_pointer); - OBJ_RELEASE(new_group_pointer); - new_group_pointer = MPI_GROUP_NULL; - - /* return the communicator */ - *newcomm = newcomp; - *proct = rprocs[0]; - rc = OMPI_SUCCESS; - - cleanup: - if (NULL != rprocs) { - free(rprocs); - } - if (NULL != new_proc_list) { - free(new_proc_list); - } - if (OMPI_SUCCESS != rc && MPI_COMM_NULL != newcomp) { - OBJ_RELEASE(newcomp); - } -} - -static void connect_complete(int status, orte_process_name_t* sender, - opal_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata) -{ - ompi_communicator_t *newcomm=MPI_COMM_NULL; - ompi_proc_t *proct=NULL; - orte_dpm_prequest_t *req=NULL, *rptr; - int rc, cnt; - uint32_t id; - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:pconnect: starting", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - /* unpack the request id */ - cnt=1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &id, &cnt, OPAL_UINT32))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - - /* find this request on the list */ - req = NULL; - OPAL_THREAD_LOCK(&ompi_dpm_port_mutex); - OPAL_LIST_FOREACH(rptr, &orte_dpm_connectors, orte_dpm_prequest_t) { - if (id == rptr->id) { - req = rptr; - break; - } - } - if (NULL == req) { - /* unknown request */ - opal_output(0, "%s dpm:pconnect: received unknown id %u from %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), id, - ORTE_NAME_PRINT(sender)); - OPAL_THREAD_UNLOCK(&ompi_dpm_port_mutex); - return; - } - /* remove the request from the list */ - opal_list_remove_item(&orte_dpm_connectors, &req->super); - OPAL_THREAD_UNLOCK(&ompi_dpm_port_mutex); - - OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output, - "%s dpm:pconnect: found request %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), id)); - - if (req->event_active) { - /* release the timeout */ - opal_event_del(&req->ev); - } - - /* process the request - as the initiator, we will send first - * for communicator creation - */ - process_request(sender, buffer, true, &newcomm, &proct); - /* notify the MPI layer */ - req->cbfunc(newcomm, proct, req->cbdata); - - cleanup: - if (NULL != req) { - OBJ_RELEASE(req); - } -} - -static int dpm_pconnect(char *port, - struct timeval *timeout, - ompi_dpm_base_paccept_connect_callback_fn_t cbfunc, - void *cbdata) -{ - char *hnp_uri, *rml_uri; - orte_rml_tag_t tag; - int rc; - orte_dpm_prequest_t *connector; - orte_process_name_t peer; - ompi_group_t *group=MPI_COMM_SELF->c_local_group; - opal_buffer_t *buf; - - /* separate the string into the HNP and RML URI and tag */ - if (ORTE_SUCCESS != (rc = parse_port_name(port, &hnp_uri, &rml_uri, &tag))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* extract the originating proc's name */ - if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(rml_uri, &peer, NULL))) { - ORTE_ERROR_LOG(rc); - free(hnp_uri); free(rml_uri); - return rc; - } - /* make sure we can route rml messages to the destination job */ - if (ORTE_SUCCESS != (rc = route_to_port(hnp_uri, &peer))) { - ORTE_ERROR_LOG(rc); - free(hnp_uri); free(rml_uri); - return rc; - } - opal_output(0, "dpm:pconnect requesting connect to %s on tag %d", - ORTE_NAME_PRINT(&peer), tag); - - free(hnp_uri); free(rml_uri); - - /* create a message to the remote peer */ - buf = OBJ_NEW(opal_buffer_t); - - /* track the connection request */ - connector = OBJ_NEW(orte_dpm_prequest_t); - connector->tag = tag; - connector->cbfunc = cbfunc; - connector->cbdata = cbdata; - OPAL_THREAD_LOCK(&ompi_dpm_port_mutex); - connector->id = next_preq++; - opal_list_append(&orte_dpm_connectors, &connector->super); - OPAL_THREAD_UNLOCK(&ompi_dpm_port_mutex); - - /* pack my request id */ - if (ORTE_SUCCESS != (rc = opal_dss.pack(buf, &connector->id, 1, OPAL_UINT32))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - OPAL_THREAD_LOCK(&ompi_dpm_port_mutex); - opal_list_remove_item(&orte_dpm_connectors, &connector->super); - OPAL_THREAD_UNLOCK(&ompi_dpm_port_mutex); - OBJ_RELEASE(connector); - return rc; - } - /* pack the request info */ - if (ORTE_SUCCESS != ompi_proc_pack(group->grp_proc_pointers, 1, true, buf)) { - OBJ_RELEASE(buf); - OPAL_THREAD_LOCK(&ompi_dpm_port_mutex); - opal_list_remove_item(&orte_dpm_connectors, &connector->super); - OPAL_THREAD_UNLOCK(&ompi_dpm_port_mutex); - OBJ_RELEASE(connector); - return rc; - } - - /* setup the timeout, if requested */ - if (NULL != timeout) { - opal_output(0, "dpm:pconnect setting timeout"); - opal_event_evtimer_set(orte_event_base, - &connector->ev, timeout_cb, connector); - opal_event_set_priority(&connector->ev, ORTE_ERROR_PRI); - opal_event_evtimer_add(&connector->ev, timeout); - connector->event_active = true; - } else { - connector->event_active = false; - } - - /* send it to our new friend */ - OPAL_OUTPUT_VERBOSE((2, ompi_dpm_base_framework.framework_output, - "%s dpm:pconnect sending connect to %s on tag %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&peer), tag)); - - if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(&peer, buf, tag, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - } - - return rc; -} - -static void paccept_recv(int status, - orte_process_name_t* peer, - struct opal_buffer_t* buffer, - orte_rml_tag_t tag, - void* cbdata) -{ - orte_dpm_prequest_t *acceptor = (orte_dpm_prequest_t*)cbdata; - ompi_communicator_t *newcomm=MPI_COMM_NULL; - ompi_proc_t *proct=NULL; - - OPAL_OUTPUT_VERBOSE((2, ompi_dpm_base_framework.framework_output, - "%s dpm:paccept recvd request from %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer))); - - /* process the request - as the acceptor, we will recv first - * on communicator formation - */ - process_request(peer, buffer, false, &newcomm, &proct); - /* if we succeeded, notify the MPI layer */ - if (MPI_COMM_NULL != newcomm) { - acceptor->cbfunc(newcomm, proct, acceptor->cbdata); - } -} - -static int dpm_paccept(char *port, - ompi_dpm_base_paccept_connect_callback_fn_t cbfunc, - void *cbdata) -{ - orte_rml_tag_t tag; - int rc; - orte_dpm_prequest_t *acceptor; - - /* extract the RML tag from the port name - it's the only part we need */ - if (OMPI_SUCCESS != (rc = parse_port_name(port, NULL, NULL, &tag))) { - return rc; - } - - /* track the accept request */ - acceptor = OBJ_NEW(orte_dpm_prequest_t); - acceptor->tag = tag; - acceptor->cbfunc = cbfunc; - acceptor->cbdata = cbdata; - OPAL_THREAD_LOCK(&ompi_dpm_port_mutex); - opal_list_append(&orte_dpm_acceptors, &acceptor->super); - OPAL_THREAD_UNLOCK(&ompi_dpm_port_mutex); - - /* register a recv for this tag */ - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, tag, - ORTE_RML_PERSISTENT, - paccept_recv, acceptor); - - return OMPI_SUCCESS; -} - -static void dpm_pclose(char *port) -{ - orte_rml_tag_t tag; - orte_dpm_prequest_t *rptr; - - /* extract the RML tag from the port name - it's the only part we need */ - if (OMPI_SUCCESS != parse_port_name(port, NULL, NULL, &tag)) { - return; - } - - OPAL_THREAD_LOCK(&ompi_dpm_port_mutex); - OPAL_LIST_FOREACH(rptr, &orte_dpm_acceptors, orte_dpm_prequest_t) { - if (tag == rptr->tag) { - /* found it */ - opal_list_remove_item(&orte_dpm_acceptors, &rptr->super); - orte_rml.recv_cancel(ORTE_NAME_WILDCARD, tag); - OBJ_RELEASE(rptr); - break; - } - } - OPAL_THREAD_UNLOCK(&ompi_dpm_port_mutex); -} diff --git a/ompi/mca/dpm/orte/dpm_orte.h b/ompi/mca/dpm/orte/dpm_orte.h deleted file mode 100644 index da6793068a..0000000000 --- a/ompi/mca/dpm/orte/dpm_orte.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2006 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OMPI_DPM_ORTE_H -#define OMPI_DPM_ORTE_H - -#include "ompi_config.h" - - -#include "ompi/mca/dpm/dpm.h" - -BEGIN_C_DECLS - -/* access to module */ -extern ompi_dpm_base_module_t ompi_dpm_orte_module; - -OMPI_MODULE_DECLSPEC extern ompi_dpm_base_component_t mca_dpm_orte_component; - -END_C_DECLS - -#endif /* OMPI_DPM_ORTE_H */ diff --git a/ompi/mca/dpm/orte/dpm_orte_component.c b/ompi/mca/dpm/orte/dpm_orte_component.c deleted file mode 100644 index 14af546622..0000000000 --- a/ompi/mca/dpm/orte/dpm_orte_component.c +++ /dev/null @@ -1,67 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2006 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "ompi/constants.h" - -#include "dpm_orte.h" - -static int dpm_orte_component_open(void); -static int dpm_orte_component_close(void); -static int dpm_orte_component_query(mca_base_module_t **module, int *priority); - -ompi_dpm_base_component_t mca_dpm_orte_component = { - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - - .base_version = { - OMPI_DPM_BASE_VERSION_2_0_0, - - .mca_component_name = "orte", - MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION), - .mca_open_component = dpm_orte_component_open, - .mca_close_component = dpm_orte_component_close, - .mca_query_component = dpm_orte_component_query, - }, - .base_data = { - /* This component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - - -int dpm_orte_component_open(void) -{ - return OMPI_SUCCESS; -} - -int dpm_orte_component_close(void) -{ - return OMPI_SUCCESS; -} - -static int dpm_orte_component_query(mca_base_module_t **module, int *priority) -{ - *priority = 50; - *module = (mca_base_module_t *) &ompi_dpm_orte_module; - return OMPI_SUCCESS; -} diff --git a/ompi/mca/dpm/orte/help-ompi-dpm-orte.txt b/ompi/mca/dpm/orte/help-ompi-dpm-orte.txt deleted file mode 100644 index 6780a31de7..0000000000 --- a/ompi/mca/dpm/orte/help-ompi-dpm-orte.txt +++ /dev/null @@ -1,43 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English general help file for Open MPI. -# -[dpm-orte:no-server] -Process rank %ld attempted to %s a global ompi_server that -could not be contacted. This is typically caused by either not -specifying the contact info for the server, or by the server not -currently executing. If you did specify the contact info for a -server, please check to see that the server is running and start -it again (or have your sys admin start it) if it isn't. - -[dpm-orte:unknown-order] -Process rank %ld attempted to lookup a value but provided an -unrecognized order parameter. Order parameters are used to tell Open -MPI if it should first look for the requested value locally (i.e., from -the current job) or from a global ompi_server. Accepted order -parameters are "local" and "global", respectively. - -[dpm-orte:too-many-orders] -Process rank %ld attempted to lookup a value but provided too many -order parameters (%ld found). Order parameters are used to tell -Open MPI if it should first look for the requested value locally -(i.e., from the current job) or from a global ompi_server. Accepted -order parameters are "local" and "global", respectively, and each can -only be specified once. diff --git a/ompi/mca/dpm/orte/owner.txt b/ompi/mca/dpm/orte/owner.txt deleted file mode 100644 index 4ad6f408ca..0000000000 --- a/ompi/mca/dpm/orte/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: INTEL -status: maintenance diff --git a/ompi/mca/mtl/mxm/mtl_mxm.c b/ompi/mca/mtl/mxm/mtl_mxm.c index fc7155822c..2b6be1ac50 100644 --- a/ompi/mca/mtl/mxm/mtl_mxm.c +++ b/ompi/mca/mtl/mxm/mtl_mxm.c @@ -1,7 +1,7 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* * Copyright (C) 2001-2011 Mellanox Technologies Ltd. ALL RIGHTS RESERVED. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights @@ -177,7 +177,7 @@ static int ompi_mtl_mxm_send_ep_address(void *address, size_t address_len) /* Send address length */ sprintf(modex_name, "%s-len", modex_component_name); - OPAL_MODEX_SEND_STRING(rc, PMIX_SYNC_REQD, PMIX_GLOBAL, + OPAL_MODEX_SEND_STRING(rc, OPAL_PMIX_GLOBAL, modex_name, &address_len, sizeof(address_len)); if (OMPI_SUCCESS != rc) { MXM_ERROR("failed to send address length"); @@ -192,7 +192,7 @@ static int ompi_mtl_mxm_send_ep_address(void *address, size_t address_len) while (modex_buf_size) { sprintf(modex_name, "%s-%d", modex_component_name, modex_name_id); modex_cur_size = (modex_buf_size < modex_max_size) ? modex_buf_size : modex_max_size; - OPAL_MODEX_SEND_STRING(rc, PMIX_SYNC_REQD, PMIX_GLOBAL, + OPAL_MODEX_SEND_STRING(rc, OPAL_PMIX_GLOBAL, modex_name, modex_buf_ptr, modex_cur_size); if (OMPI_SUCCESS != rc) { MXM_ERROR("Open MPI couldn't distribute EP connection details"); @@ -232,7 +232,7 @@ static int ompi_mtl_mxm_recv_ep_address(ompi_proc_t *source_proc, void **address /* Receive address length */ sprintf(modex_name, "%s-len", modex_component_name); - OPAL_MODEX_RECV_STRING(rc, modex_name, &source_proc->super, + OPAL_MODEX_RECV_STRING(rc, modex_name, &source_proc->super.proc_name, (char**)&address_len_buf_ptr, &modex_cur_size); if (OMPI_SUCCESS != rc) { @@ -253,7 +253,7 @@ static int ompi_mtl_mxm_recv_ep_address(ompi_proc_t *source_proc, void **address modex_buf_size = 0; while (modex_buf_size < *address_len_p) { sprintf(modex_name, "%s-%d", modex_component_name, modex_name_id); - OPAL_MODEX_RECV_STRING(rc, modex_name, &source_proc->super, + OPAL_MODEX_RECV_STRING(rc, modex_name, &source_proc->super.proc_name, (char**)&modex_buf_ptr, &modex_cur_size); if (OMPI_SUCCESS != rc) { diff --git a/ompi/mca/mtl/ofi/mtl_ofi_compat.h b/ompi/mca/mtl/ofi/mtl_ofi_compat.h index f8fe3a7b89..1a87841843 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_compat.h +++ b/ompi/mca/mtl/ofi/mtl_ofi_compat.h @@ -21,14 +21,14 @@ #if (OPAL_MAJOR_VERSION >= 2) #include "opal/mca/pmix/pmix.h" +#include "opal/mca/pmix/pmix_types.h" #define OFI_COMPAT_MODEX_RECV(ret, mtl_version, proc, ep_name, size) \ - OPAL_MODEX_RECV((ret), (mtl_version), &(proc)->super, (ep_name), (size)); + OPAL_MODEX_RECV((ret), (mtl_version), &(proc)->super.proc_name, (ep_name), (size)); #define OFI_COMPAT_MODEX_SEND(ret, mtl_version, ep_name, namelen) \ OPAL_MODEX_SEND((ret), \ - PMIX_SYNC_REQD, \ - PMIX_GLOBAL, \ + OPAL_PMIX_GLOBAL, \ (mtl_version), \ (ep_name)[0], \ (namelen)); diff --git a/ompi/mca/mtl/portals4/mtl_portals4.c b/ompi/mca/mtl/portals4/mtl_portals4.c index 70673a2acc..e80e3fe2c2 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4.c +++ b/ompi/mca/mtl/portals4/mtl_portals4.c @@ -281,7 +281,7 @@ ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl, } OPAL_MODEX_RECV(ret, &mca_mtl_portals4_component.mtl_version, - &procs[i]->super, (uint8_t**)&modex_id, &size); + &procs[i]->super.proc_name, (uint8_t**)&modex_id, &size); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: ompi_modex_recv failed: %d\n", diff --git a/ompi/mca/mtl/portals4/mtl_portals4_component.c b/ompi/mca/mtl/portals4/mtl_portals4_component.c index f9ba68cc06..0906d637c3 100644 --- a/ompi/mca/mtl/portals4/mtl_portals4_component.c +++ b/ompi/mca/mtl/portals4/mtl_portals4_component.c @@ -313,7 +313,7 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads, goto error; } - OPAL_MODEX_SEND(ret, PMIX_SYNC_REQD, PMIX_GLOBAL, + OPAL_MODEX_SEND(ret, OPAL_PMIX_GLOBAL, &mca_mtl_portals4_component.mtl_version, &id, sizeof(id)); if (OMPI_SUCCESS != ret) { diff --git a/ompi/mca/mtl/psm/mtl_psm.c b/ompi/mca/mtl/psm/mtl_psm.c index 6adcfabed9..856589523a 100644 --- a/ompi/mca/mtl/psm/mtl_psm.c +++ b/ompi/mca/mtl/psm/mtl_psm.c @@ -174,7 +174,7 @@ int ompi_mtl_psm_module_init(int local_rank, int num_local_procs) { ompi_mtl_psm.epid = epid; ompi_mtl_psm.mq = mq; - OPAL_MODEX_SEND(rc, PMIX_SYNC_REQD, PMIX_GLOBAL, + OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL, &mca_mtl_psm_component.super.mtl_version, &ompi_mtl_psm.epid, sizeof(psm_epid_t)); @@ -296,7 +296,7 @@ ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl, } OPAL_MODEX_RECV(rc, &mca_mtl_psm_component.super.mtl_version, - &procs[i]->super, (void**)&epid, &size); + &procs[i]->super.proc_name, (void**)&epid, &size); if (rc != OMPI_SUCCESS || size != sizeof(psm_epid_t)) { rc = OMPI_ERROR; goto bail; diff --git a/ompi/mca/mtl/psm2/mtl_psm2.c b/ompi/mca/mtl/psm2/mtl_psm2.c index 4b95ce44a0..34fe8ae892 100644 --- a/ompi/mca/mtl/psm2/mtl_psm2.c +++ b/ompi/mca/mtl/psm2/mtl_psm2.c @@ -157,7 +157,7 @@ int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) { ompi_mtl_psm2.epid = epid; ompi_mtl_psm2.mq = mq; - OPAL_MODEX_SEND(rc, PMIX_SYNC_REQD, PMIX_GLOBAL, + OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL, &mca_mtl_psm2_component.super.mtl_version, &ompi_mtl_psm2.epid, sizeof(psm_epid_t)); @@ -280,7 +280,7 @@ ompi_mtl_psm2_add_procs(struct mca_mtl_base_module_t *mtl, } OPAL_MODEX_RECV(rc, &mca_mtl_psm2_component.super.mtl_version, - &procs[i]->super, (void**)&epid, &size); + &procs[i]->super.proc_name, (void**)&epid, &size); if (rc != OMPI_SUCCESS || size != sizeof(psm_epid_t)) { return OMPI_ERROR; } diff --git a/ompi/mca/pml/base/pml_base_select.c b/ompi/mca/pml/base/pml_base_select.c index b046cf4222..258e105a84 100644 --- a/ompi/mca/pml/base/pml_base_select.c +++ b/ompi/mca/pml/base/pml_base_select.c @@ -312,7 +312,7 @@ mca_pml_base_pml_selected(const char *name) { int rc; - OPAL_MODEX_SEND(rc, PMIX_SYNC_REQD, PMIX_GLOBAL, &pml_base_component, name, strlen(name) + 1); + OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL, &pml_base_component, name, strlen(name) + 1); return rc; } @@ -343,7 +343,7 @@ mca_pml_base_pml_check_selected(const char *my_pml, /* get the name of the PML module selected by rank=0 */ OPAL_MODEX_RECV(ret, &pml_base_component, - &procs[0]->super, (void**) &remote_pml, &size); + &procs[0]->super.proc_name, (void**) &remote_pml, &size); /* if this key wasn't found, then just assume all is well... */ if (OMPI_SUCCESS != ret) { diff --git a/ompi/mca/pml/yalla/pml_yalla.c b/ompi/mca/pml/yalla/pml_yalla.c index d9d7984389..4200aef383 100644 --- a/ompi/mca/pml/yalla/pml_yalla.c +++ b/ompi/mca/pml/yalla/pml_yalla.c @@ -68,7 +68,7 @@ static int send_ep_address(void) return OMPI_ERROR; } - OPAL_MODEX_SEND(rc, PMIX_SYNC_REQD, PMIX_GLOBAL, + OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL, &mca_pml_yalla_component.pmlm_version, address, addrlen); if (OMPI_SUCCESS != rc) { PML_YALLA_ERROR("Open MPI couldn't distribute EP connection details"); @@ -82,7 +82,7 @@ static int recv_ep_address(ompi_proc_t *proc, void **address_p, size_t *addrlen_ { int rc; - OPAL_MODEX_RECV(rc, &mca_pml_yalla_component.pmlm_version, &proc->super, + OPAL_MODEX_RECV(rc, &mca_pml_yalla_component.pmlm_version, &proc->super.proc_name, address_p, addrlen_p); if (rc < 0) { PML_YALLA_ERROR("Failed to receive EP address"); diff --git a/ompi/mca/pubsub/Makefile.am b/ompi/mca/pubsub/Makefile.am deleted file mode 100644 index a595cf971c..0000000000 --- a/ompi/mca/pubsub/Makefile.am +++ /dev/null @@ -1,37 +0,0 @@ -# -# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# main library setup -noinst_LTLIBRARIES = libmca_pubsub.la -libmca_pubsub_la_SOURCES = - -# local files -headers = pubsub.h -libmca_pubsub_la_SOURCES += $(headers) - -# Conditionally install the header files -if WANT_INSTALL_HEADERS -ompidir = $(ompiincludedir)/$(subdir) -nobase_ompi_HEADERS = $(headers) -endif - -include base/Makefile.am - -distclean-local: - rm -f base/static-components.h diff --git a/ompi/mca/pubsub/base/base.h b/ompi/mca/pubsub/base/base.h deleted file mode 100644 index 0644197f36..0000000000 --- a/ompi/mca/pubsub/base/base.h +++ /dev/null @@ -1,54 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. - * All rights reserved - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#ifndef OMPI_MCA_PUBSUB_BASE_H -#define OMPI_MCA_PUBSUB_BASE_H - -#include "ompi_config.h" -#include "ompi/constants.h" - -#include "ompi/mca/pubsub/pubsub.h" - -/* - * Global functions for MCA overall PUBSUB - */ - -BEGIN_C_DECLS - -/* - * MCA framework - */ -OMPI_DECLSPEC extern mca_base_framework_t ompi_pubsub_base_framework; -/* - * Select an available component. - */ -OMPI_DECLSPEC int ompi_pubsub_base_select(void); - -/* NULL functions */ -OMPI_DECLSPEC int ompi_pubsub_base_null_publish(const char *service, ompi_info_t *info, const char *port); -OMPI_DECLSPEC int ompi_pubsub_base_null_unpublish(const char *service, ompi_info_t *info); -OMPI_DECLSPEC char* ompi_pubsub_base_null_lookup(const char *service, ompi_info_t *info); - -/* useful globals */ -OMPI_DECLSPEC extern ompi_pubsub_base_module_t ompi_pubsub; - -END_C_DECLS - -#endif /* OMPI_MCA_PUBSUB_BASE_H */ diff --git a/ompi/mca/pubsub/base/owner.txt b/ompi/mca/pubsub/base/owner.txt deleted file mode 100644 index 4ad6f408ca..0000000000 --- a/ompi/mca/pubsub/base/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: INTEL -status: maintenance diff --git a/ompi/mca/pubsub/base/pubsub_base_frame.c b/ompi/mca/pubsub/base/pubsub_base_frame.c deleted file mode 100644 index d1c98dab27..0000000000 --- a/ompi/mca/pubsub/base/pubsub_base_frame.c +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. - * All rights reserved - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/mca/mca.h" -#include "opal/util/output.h" -#include "opal/mca/base/base.h" - - -#include "ompi/mca/pubsub/pubsub.h" -#include "ompi/mca/pubsub/base/base.h" - -#include "ompi/mca/pubsub/base/static-components.h" - -/* - * Globals - */ -OMPI_DECLSPEC ompi_pubsub_base_module_t ompi_pubsub={ - NULL, - ompi_pubsub_base_null_publish, - ompi_pubsub_base_null_unpublish, - ompi_pubsub_base_null_lookup, - NULL -}; - -static int ompi_pubsub_base_close(void) -{ - /* Close the selected component */ - if( NULL != ompi_pubsub.finalize ) { - ompi_pubsub.finalize(); - } - - return mca_base_framework_components_close(&ompi_pubsub_base_framework, NULL); -} - -/** - * Function for finding and opening either all MCA components, - * or the one that was specifically requested via a MCA parameter. - */ -static int ompi_pubsub_base_open(mca_base_open_flag_t flags) -{ - /* Open up all available components */ - return mca_base_framework_components_open(&ompi_pubsub_base_framework, flags); -} - -MCA_BASE_FRAMEWORK_DECLARE(ompi, pubsub, "OMPI Publish-Subscribe Subsystem", NULL, - ompi_pubsub_base_open, ompi_pubsub_base_close, - mca_pubsub_base_static_components, 0); diff --git a/ompi/mca/pubsub/base/pubsub_base_null_fns.c b/ompi/mca/pubsub/base/pubsub_base_null_fns.c deleted file mode 100644 index 38df9cab62..0000000000 --- a/ompi/mca/pubsub/base/pubsub_base_null_fns.c +++ /dev/null @@ -1,38 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2013 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/mca/pubsub/pubsub.h" -#include "ompi/mca/pubsub/base/base.h" - -int ompi_pubsub_base_null_publish(const char *service, ompi_info_t *info, const char *port) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - -int ompi_pubsub_base_null_unpublish(const char *service, ompi_info_t *info) -{ - return OMPI_ERR_NOT_SUPPORTED; -} - -char* ompi_pubsub_base_null_lookup(const char *service, ompi_info_t *info) -{ - return NULL; -} diff --git a/ompi/mca/pubsub/base/pubsub_base_select.c b/ompi/mca/pubsub/base/pubsub_base_select.c deleted file mode 100644 index c16a262313..0000000000 --- a/ompi/mca/pubsub/base/pubsub_base_select.c +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2005 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "ompi/mca/mca.h" -#include "opal/mca/base/base.h" - -#include "opal/mca/base/mca_base_component_repository.h" - -#include "ompi/mca/pubsub/pubsub.h" -#include "ompi/mca/pubsub/base/base.h" - - -int ompi_pubsub_base_select(void) -{ - int ret; - ompi_pubsub_base_component_t *best_component = NULL; - ompi_pubsub_base_module_t *best_module = NULL; - - /* - * Select the best component - */ - if( OPAL_SUCCESS != (ret = mca_base_select("pubsub", ompi_pubsub_base_framework.framework_output, - &ompi_pubsub_base_framework.framework_components, - (mca_base_module_t **) &best_module, - (mca_base_component_t **) &best_component))) { - /* it is okay not to find any executable components */ - if (OMPI_ERR_NOT_FOUND == ret) { - ret = OPAL_SUCCESS; - } - goto cleanup; - } - - /* Save the winner */ - ompi_pubsub = *best_module; - - /* init the selected module */ - if (NULL != ompi_pubsub.init) { - ret = ompi_pubsub.init(); - } - - cleanup: - return ret; -} diff --git a/ompi/mca/pubsub/orte/Makefile.am b/ompi/mca/pubsub/orte/Makefile.am deleted file mode 100644 index 9336c20e58..0000000000 --- a/ompi/mca/pubsub/orte/Makefile.am +++ /dev/null @@ -1,42 +0,0 @@ -# -# Copyright (c) 2004-2006 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2009 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -dist_ompidata_DATA = help-ompi-pubsub-orte.txt - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_ompi_pubsub_orte_DSO -component_noinst = -component_install = mca_pubsub_orte.la -else -component_noinst = libmca_pubsub_orte.la -component_install = -endif - -local_sources = \ - pubsub_orte.c \ - pubsub_orte.h \ - pubsub_orte_component.c - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_pubsub_orte_la_SOURCES = $(local_sources) -mca_pubsub_orte_la_LDFLAGS = -module -avoid-version $(pubsub_orte_LDFLAGS) - -noinst_LTLIBRARIES = $(component_noinst) -libmca_pubsub_orte_la_SOURCES = $(local_sources) -libmca_pubsub_orte_la_LIBADD = $(pubsub_orte_LIBS) -libmca_pubsub_orte_la_LDFLAGS = -module -avoid-version $(pubsub_orte_LDFLAGS) - diff --git a/ompi/mca/pubsub/orte/configure.m4 b/ompi/mca/pubsub/orte/configure.m4 deleted file mode 100644 index 5f29d00a0c..0000000000 --- a/ompi/mca/pubsub/orte/configure.m4 +++ /dev/null @@ -1,24 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2011 Los Alamos National Security, LLC. -# All rights reserved. -# -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_pubsub_orte_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_ompi_pubsub_orte_CONFIG],[ - AC_CONFIG_FILES([ompi/mca/pubsub/orte/Makefile]) - - AC_ARG_WITH([orte], - AC_HELP_STRING([--with-orte], - [Use ORTE run-time environment (default: yes)])) - AS_IF([test "$with_orte" != "no"], - [$1], - [$2]) -]) diff --git a/ompi/mca/pubsub/orte/help-ompi-pubsub-orte.txt b/ompi/mca/pubsub/orte/help-ompi-pubsub-orte.txt deleted file mode 100644 index 6f99893c4a..0000000000 --- a/ompi/mca/pubsub/orte/help-ompi-pubsub-orte.txt +++ /dev/null @@ -1,43 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English general help file for Open MPI. -# -[pubsub-orte:no-server] -Process rank %ld attempted to %s a global ompi_server that -could not be contacted. This is typically caused by either not -specifying the contact info for the server, or by the server not -currently executing. If you did specify the contact info for a -server, please check to see that the server is running and start -it again (or have your sys admin start it) if it isn't. - -[pubsub-orte:unknown-order] -Process rank %ld attempted to lookup a value but provided an -unrecognized order parameter. Order parameters are used to tell Open -MPI if it should first look for the requested value locally (i.e., from -the current job) or from a global ompi_server. Accepted order -parameters are "local" and "global", respectively. - -[pubsub-orte:too-many-orders] -Process rank %ld attempted to lookup a value but provided too many -order parameters (%ld found). Order parameters are used to tell -Open MPI if it should first look for the requested value locally -(i.e., from the current job) or from a global ompi_server. Accepted -order parameters are "local" and "global", respectively, and each can -only be specified once. diff --git a/ompi/mca/pubsub/orte/owner.txt b/ompi/mca/pubsub/orte/owner.txt deleted file mode 100644 index 4ad6f408ca..0000000000 --- a/ompi/mca/pubsub/orte/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: INTEL -status: maintenance diff --git a/ompi/mca/pubsub/orte/pubsub_orte.c b/ompi/mca/pubsub/orte/pubsub_orte.c deleted file mode 100644 index 67bbf001c3..0000000000 --- a/ompi/mca/pubsub/orte/pubsub_orte.c +++ /dev/null @@ -1,582 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2013 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "ompi/constants.h" - -#include -#include -#include - -#include "opal/util/show_help.h" -#include "opal/util/argv.h" -#include "opal/dss/dss.h" - -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/rml/rml_types.h" -#include "orte/mca/rml/base/rml_contact.h" -#include "orte/mca/routed/routed.h" -#include "orte/util/name_fns.h" -#include "orte/runtime/orte_globals.h" -#include "orte/runtime/orte_data_server.h" - -#include "ompi/info/info.h" -#include "ompi/mca/rte/rte.h" - -#include "ompi/mca/pubsub/base/base.h" -#include "pubsub_orte.h" - -/* Establish contact with the server - * - * NOTE: we do not do this automatically during init to avoid - * forcing every process to pay the time penalty during MPI_Init - * when only a few, if any, will ever call pub/lookup/unpub. In - * addition, those that -do- call these functions may well only - * use local (as opposed to global) storage, and hence will have - * no need to talk to the server, even though a sys admin may - * have set one up. So we do a lazy setup of the server contact - * info - it only gets setup the first time we call a function - * that wants to talk to the global server - */ -static bool server_setup=false; - -static void setup_server(void) -{ - opal_buffer_t buf; - int rc; - - OPAL_OUTPUT_VERBOSE((1, ompi_pubsub_base_framework.framework_output, - "%s pubsub:orte: setting up server at URI %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (NULL == mca_pubsub_orte_component.server_uri) ? "NULL" : mca_pubsub_orte_component.server_uri)); - - /* flag setup as completed so we only pass through here once */ - server_setup = true; - - if (NULL == mca_pubsub_orte_component.server_uri) { - /* if the contact info for the server is NULL, then there - * is nothing we can do - there is no path to the server - */ - mca_pubsub_orte_component.server_found = false; - return; - } - - /* init the route to the server - init_routes wants a buffer - * passed to it, so we have to package the server's contact - * info into a buffer - */ - OBJ_CONSTRUCT(&buf, opal_buffer_t); - opal_dss.pack(&buf, &mca_pubsub_orte_component.server_uri, 1, OPAL_STRING); - /* extract the server's name so we have its jobid */ - if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(mca_pubsub_orte_component.server_uri, - &mca_pubsub_orte_component.server, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&buf); - mca_pubsub_orte_component.server_found = false; - return; - } - /* init routes to the server's job */ - if (ORTE_SUCCESS != (rc = orte_routed.init_routes(mca_pubsub_orte_component.server.jobid, &buf))) { - ORTE_ERROR_LOG(rc); - mca_pubsub_orte_component.server_found = false; - OBJ_DESTRUCT(&buf); - return; - } - OBJ_DESTRUCT(&buf); - - /* flag the server as found */ - mca_pubsub_orte_component.server_found = true; - - OPAL_OUTPUT_VERBOSE((1, ompi_pubsub_base_framework.framework_output, - "%s pubsub:orte: server %s setup", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(&mca_pubsub_orte_component.server))); -} - -/* - * Init the module - */ -static int init(void) -{ - return OMPI_SUCCESS; -} - -/* - * publish the port_name for the specified service_name. This will - * be published under our process name, so only we will be allowed - * to remove it later. - */ -static int publish ( const char *service_name, ompi_info_t *info, const char *port_name ) -{ - int rc, ret, flag; - bool global_scope = false; - orte_process_name_t *info_host; - opal_buffer_t *buf; - orte_data_server_cmd_t cmd=ORTE_DATA_SERVER_PUBLISH; - orte_std_cntr_t cnt; - orte_rml_recv_cb_t xfer; - bool unique=false; - - ompi_info_get_bool(info, "ompi_global_scope", &global_scope, &flag); - - if (0 == flag) { - /* scope was not defined - see if server exists */ - if (!server_setup) { - setup_server(); - } - if (mca_pubsub_orte_component.server_found) { - /* server was found - use it as our default store */ - info_host = &mca_pubsub_orte_component.server; - global_scope = true; - } else { - /* server was not found - use our HNP as default store */ - info_host = ORTE_PROC_MY_HNP; - } - } else if (!global_scope) { - /* if the scope is not global, then store the value on the HNP */ - info_host = ORTE_PROC_MY_HNP; - } else { - /* has the server been setup yet? */ - if (!server_setup) { - setup_server(); - } - /* store the value on the global ompi_server, but error - * if that server wasn't contacted - */ - if (!mca_pubsub_orte_component.server_found) { - opal_show_help("help-ompi-pubsub-orte.txt", "pubsub-orte:no-server", - true, (long)ORTE_PROC_MY_NAME->vpid, "publish to"); - return OMPI_ERR_NOT_FOUND; - } - info_host = &mca_pubsub_orte_component.server; - } - - OPAL_OUTPUT_VERBOSE((1, ompi_pubsub_base_framework.framework_output, - "%s pubsub:orte: publishing service %s scope %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - service_name, global_scope ? "Global" : "Local")); - - ompi_info_get_bool(info, "ompi_unique", &unique, &flag); - if (0 == flag) { - /* uniqueness not specified - overwrite by default */ - unique = false; - } - - /* construct the buffer */ - buf = OBJ_NEW(opal_buffer_t); - - /* pack the publish command */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &cmd, 1, ORTE_DATA_SERVER_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - goto CLEANUP; - } - - /* pack the service name */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &service_name, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - goto CLEANUP; - } - - /* pack the port name */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &port_name, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - goto CLEANUP; - } - - /* pack the uniqueness flag */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &unique, 1, OPAL_BOOL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - goto CLEANUP; - } - - /* send the data */ - if (0 > (rc = orte_rml.send_buffer_nb(info_host, buf, - ORTE_RML_TAG_DATA_SERVER, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - goto CLEANUP; - } - - /* get the answer */ - OBJ_CONSTRUCT(&xfer, orte_rml_recv_cb_t); - xfer.active = true; - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_DATA_CLIENT, - ORTE_RML_NON_PERSISTENT, - orte_rml_recv_callback, &xfer); - OMPI_WAIT_FOR_COMPLETION(xfer.active); - - /* unpack the result */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(&xfer.data, &ret, &cnt, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - } - rc = ret; - OBJ_DESTRUCT(&xfer); - -CLEANUP: - return rc; -} - -enum { NONE, LOCAL, GLOBAL }; - -static char* lookup ( const char *service_name, ompi_info_t *info ) -{ - orte_process_name_t *info_host; - opal_buffer_t *buf; - orte_data_server_cmd_t cmd=ORTE_DATA_SERVER_LOOKUP; - orte_std_cntr_t cnt=0; - char *port_name=NULL; - int ret, rc, flag, i; - char value[256], **tokens, *ptr; - int lookup[2] = { GLOBAL, LOCAL }; - size_t num_tokens; - orte_rml_recv_cb_t xfer; - - /* Look in the MPI_Info (ompi_info_t*) for the key - * "ompi_lookup_order". Acceptable values are: - * - * - "local" -- only check the local scope - * - "global" -- only check the global scope - * - "local,global" -- check the local scope first, then check the - * global scope - * - "global,local" -- check the global scope first, then check the - * local scope - * - * Give a little leeway in terms of whitespace in the value. - * - * The lookup[2] array will contain the results: lookup[0] is the - * first scope to check, lookup[1] is the 2nd. Either value may - * be NONE, LOCAL, or GLOBAL. If both are NONE, clearly that's an - * error. :-) - */ - ompi_info_get(info, "ompi_lookup_order", sizeof(value) - 1, value, &flag); - if (flag) { - ptr = &value[0]; - while (isspace(*ptr) && (ptr - value) < (int)sizeof(value)) { - ++ptr; - } - if (ptr - value < (int)sizeof(value)) { - tokens = opal_argv_split(ptr, ','); - if (NULL != tokens) { - if ((num_tokens = opal_argv_count(tokens)) > 2) { - /* too many values in the comma-delimited list */ - opal_show_help("help-ompi-pubsub-orte.txt", - "pubsub-orte:too-many-orders", - true, (long)ORTE_PROC_MY_NAME->vpid, - (long)num_tokens); - opal_argv_free(tokens); - return NULL; - } - for (i = 0; i < 2; ++i) { - if (NULL != tokens[i]) { - if (0 == strcasecmp(tokens[i], "local")) { - lookup[i] = LOCAL; - } else if (0 == strcasecmp(tokens[i], "global")) { - lookup[i] = GLOBAL; - } else { - /* unrecognized value -- that's an error */ - opal_show_help("help-ompi-pubsub-orte.txt", - "pubsub-orte:unknown-order", - true, (long)ORTE_PROC_MY_NAME->vpid); - opal_argv_free(tokens); - return NULL; - } - } else { - lookup[i] = NONE; - } - } - opal_argv_free(tokens); - } - } - - if (NONE == lookup[0]) { - /* if the user provided an info key, then we at least must - * be given one place to look - */ - opal_show_help("help-ompi-pubsub-orte.txt", - "pubsub-orte:unknown-order", - true, (long)ORTE_PROC_MY_NAME->vpid); - return NULL; - } - - } else { - /* if no info key was provided, then we default to the global - * server IF it is active - */ - if (!server_setup) { - setup_server(); - } - lookup[1] = NONE; - if (mca_pubsub_orte_component.server_found) { - lookup[0] = GLOBAL; - } else { - /* global server was not found - just look local */ - lookup[0] = LOCAL; - } - } - - OPAL_OUTPUT_VERBOSE((1, ompi_pubsub_base_framework.framework_output, - "%s pubsub:orte: lookup service %s scope %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - service_name, lookup[0])); - - /* go find the value */ - for (i=0; i < 2; i++) { - if (LOCAL == lookup[i]) { - /* if the scope is local, then lookup the value on the HNP */ - info_host = ORTE_PROC_MY_HNP; - } else if (GLOBAL == lookup[i]) { - /* has the server been setup yet? */ - if (!server_setup) { - setup_server(); - } - /* lookup the value on the global ompi_server, but error - * if that server wasn't contacted - */ - if (!mca_pubsub_orte_component.server_found) { - opal_show_help("help-ompi-pubsub-orte.txt", - "pubsub-orte:no-server", - true, (long)ORTE_PROC_MY_NAME->vpid, - "lookup from"); - return NULL; - } - info_host = &mca_pubsub_orte_component.server; - } else if (NONE == lookup[i]) { - continue; - } else { - /* unknown host! */ - opal_show_help("help-ompi-pubsub-orte.txt", - "pubsub-orte:unknown-order", - true, (long)ORTE_PROC_MY_NAME->vpid); - return NULL; - } - - /* go look it up */ - /* construct the buffer */ - buf = OBJ_NEW(opal_buffer_t); - - /* pack the lookup command */ - if (OPAL_SUCCESS != (ret = opal_dss.pack(buf, &cmd, 1, ORTE_DATA_SERVER_CMD))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(buf); - goto CLEANUP; - } - - /* pack the service name */ - if (OPAL_SUCCESS != (ret = opal_dss.pack(buf, &service_name, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(buf); - goto CLEANUP; - } - - /* send the cmd */ - if (0 > (ret = orte_rml.send_buffer_nb(info_host, buf, - ORTE_RML_TAG_DATA_SERVER, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(ret); - OBJ_RELEASE(buf); - goto CLEANUP; - } - - /* get the answer */ - OBJ_CONSTRUCT(&xfer, orte_rml_recv_cb_t); - xfer.active = true; - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, - ORTE_RML_TAG_DATA_CLIENT, - ORTE_RML_NON_PERSISTENT, - orte_rml_recv_callback, &xfer); - OMPI_WAIT_FOR_COMPLETION(xfer.active); - - /* unpack the return code */ - cnt = 1; - if (OPAL_SUCCESS != (ret = opal_dss.unpack(&xfer.data, &rc, &cnt, OPAL_INT))) { - ORTE_ERROR_LOG(ret); - goto CLEANUP; - } - - OPAL_OUTPUT_VERBOSE((1, ompi_pubsub_base_framework.framework_output, - "%s pubsub:orte: lookup returned status %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), rc)); - - if (ORTE_SUCCESS == rc) { - /* the server was able to lookup the port - unpack the port name */ - cnt=1; - if (OPAL_SUCCESS != (ret = opal_dss.unpack(&xfer.data, &port_name, &cnt, OPAL_STRING))) { - ORTE_ERROR_LOG(ret); - OBJ_DESTRUCT(&xfer); - goto CLEANUP; - } - - OPAL_OUTPUT_VERBOSE((1, ompi_pubsub_base_framework.framework_output, - "%s pubsub:orte: lookup returned port %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (NULL == port_name) ? "NULL" : port_name)); - - if (NULL != port_name) { - /* got an answer - return it */ - OBJ_DESTRUCT(&xfer); - return port_name; - } - } - - /* if we didn't get a port_name, then continue */ - OBJ_DESTRUCT(&xfer); - } - - /* only get here if we tried both options and failed - since the - * buffer will already have been cleaned up, just return - */ - CLEANUP: - return NULL; -} - -/* - * delete the entry. Only the process who has published - * the service_name has the right to remove this - * service - the server will verify and report the result - */ -static int unpublish ( const char *service_name, ompi_info_t *info ) -{ - int rc, ret, flag; - bool global_scope; - orte_process_name_t *info_host; - opal_buffer_t *buf; - orte_data_server_cmd_t cmd=ORTE_DATA_SERVER_UNPUBLISH; - orte_std_cntr_t cnt; - orte_rml_recv_cb_t xfer; - - ompi_info_get_bool(info, "ompi_global_scope", &global_scope, &flag); - - if (0 == flag) { - /* scope was not defined - see if server exists */ - if (!server_setup) { - setup_server(); - } - if (mca_pubsub_orte_component.server_found) { - /* server was found - use it as our default store */ - info_host = &mca_pubsub_orte_component.server; - global_scope = true; - } else { - /* server was not found - use our HNP as default store */ - info_host = ORTE_PROC_MY_HNP; - } - } else if (!global_scope) { - /* if the scope is not global, then unpublish the value from the HNP */ - info_host = ORTE_PROC_MY_HNP; - } else { - /* has the server been setup yet? */ - if (!server_setup) { - setup_server(); - } - /* unpublish the value from the global ompi_server, but error - * if that server wasn't contacted - */ - if (!mca_pubsub_orte_component.server_found) { - opal_show_help("help-ompi-pubsub-orte.txt", "pubsub-orte:no-server", - true, (long)ORTE_PROC_MY_NAME->vpid, "unpublish from"); - return OMPI_ERR_NOT_FOUND; - } - info_host = &mca_pubsub_orte_component.server; - } - - OPAL_OUTPUT_VERBOSE((1, ompi_pubsub_base_framework.framework_output, - "%s pubsub:orte: unpublish service %s scope %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - service_name, global_scope ? "Global" : "Local")); - - /* construct the buffer */ - buf = OBJ_NEW(opal_buffer_t); - - /* pack the unpublish command */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &cmd, 1, ORTE_DATA_SERVER_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - goto CLEANUP; - } - - /* pack the service name */ - if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &service_name, 1, OPAL_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - goto CLEANUP; - } - - /* send the command */ - if (0 > (rc = orte_rml.send_buffer_nb(info_host, buf, ORTE_RML_TAG_DATA_SERVER, - orte_rml_send_callback, NULL))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(buf); - goto CLEANUP; - } - - /* get the answer */ - OBJ_CONSTRUCT(&xfer, orte_rml_recv_cb_t); - xfer.active = true; - orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DATA_CLIENT, - ORTE_RML_NON_PERSISTENT, - orte_rml_recv_callback, &xfer); - OMPI_WAIT_FOR_COMPLETION(xfer.active); - - /* unpack the result */ - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(&xfer.data, &ret, &cnt, OPAL_INT))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&xfer); - goto CLEANUP; - } - OBJ_DESTRUCT(&xfer); - rc = ret; - -CLEANUP: - return rc; -} - - -/* - * finalize the module - */ -static int finalize(void) -{ - return OMPI_SUCCESS; -} - -/* - * instantiate the module - */ -ompi_pubsub_base_module_t ompi_pubsub_orte_module = { - init, - publish, - unpublish, - lookup, - finalize -}; - - diff --git a/ompi/mca/pubsub/orte/pubsub_orte.h b/ompi/mca/pubsub/orte/pubsub_orte.h deleted file mode 100644 index 0d7e116df1..0000000000 --- a/ompi/mca/pubsub/orte/pubsub_orte.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University. - * All rights reserved. - * Copyright (c) 2004-2006 The Trustees of the University of Tennessee. - * All rights reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OMPI_PUBSUB_ORTE_H -#define OMPI_PUBSUB_ORTE_H - -#include "ompi_config.h" - -#include "orte/types.h" - -#include "ompi/mca/pubsub/pubsub.h" - -BEGIN_C_DECLS - -/* - * Extend the pubsub component to hold some useful - * values for this component - */ -typedef struct { - ompi_pubsub_base_component_t super; - orte_process_name_t server; - char *server_uri; - bool server_found; -} ompi_pubsub_orte_component_t; - -/* access to module */ -extern ompi_pubsub_base_module_t ompi_pubsub_orte_module; - -/* access to component so we can get to the locally - * global values - */ -OMPI_MODULE_DECLSPEC extern ompi_pubsub_orte_component_t mca_pubsub_orte_component; - -END_C_DECLS - -#endif /* OMPI_PUBSUB_ORTE_H */ diff --git a/ompi/mca/pubsub/orte/pubsub_orte_component.c b/ompi/mca/pubsub/orte/pubsub_orte_component.c deleted file mode 100644 index 4aaaf126ea..0000000000 --- a/ompi/mca/pubsub/orte/pubsub_orte_component.c +++ /dev/null @@ -1,95 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2006 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "ompi/constants.h" - -#include "pubsub_orte.h" - -static int pubsub_orte_component_register(void); -static int pubsub_orte_component_open(void); -static int pubsub_orte_component_close(void); -static int pubsub_orte_component_query(mca_base_module_t **module, int *priority); - -static int my_priority = 50; - -ompi_pubsub_orte_component_t mca_pubsub_orte_component = { - { - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - - .base_version = { - OMPI_PUBSUB_BASE_VERSION_2_0_0, - - .mca_component_name = "orte", - MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION), - .mca_open_component = pubsub_orte_component_open, - .mca_close_component = pubsub_orte_component_close, - .mca_query_component = pubsub_orte_component_query, - .mca_register_component_params = pubsub_orte_component_register, - }, - .base_data = { - /* This component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - } -}; - -static int pubsub_orte_component_register(void) -{ - my_priority = 50; - (void) mca_base_component_var_register(&mca_pubsub_orte_component.super.base_version, - "priority", "Priority of the pubsub pmi component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &my_priority); - - mca_pubsub_orte_component.server_uri = NULL; - (void) mca_base_component_var_register(&mca_pubsub_orte_component.super.base_version, - "server", "Contact info for ompi_server for publish/subscribe operations", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &mca_pubsub_orte_component.server_uri); - - return OMPI_SUCCESS; -} - -static int pubsub_orte_component_open(void) -{ - return OMPI_SUCCESS; -} - -static int pubsub_orte_component_close(void) -{ - return OMPI_SUCCESS; -} - -static int pubsub_orte_component_query(mca_base_module_t **module, int *priority) -{ - mca_pubsub_orte_component.server_found = false; - - *priority = my_priority; - *module = (mca_base_module_t *) &ompi_pubsub_orte_module; - return OMPI_SUCCESS; -} diff --git a/ompi/mca/pubsub/pmi/Makefile.am b/ompi/mca/pubsub/pmi/Makefile.am deleted file mode 100644 index d839e64a94..0000000000 --- a/ompi/mca/pubsub/pmi/Makefile.am +++ /dev/null @@ -1,40 +0,0 @@ -# -# Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2014 Intel, Inc. All rights reserved -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AM_CPPFLAGS = $(pubsub_pmi_CPPFLAGS) - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_ompi_pubsub_pmi_DSO -component_noinst = -component_install = mca_pubsub_pmi.la -else -component_noinst = libmca_pubsub_pmi.la -component_install = -endif - -local_sources = \ - pubsub_pmi.c \ - pubsub_pmi.h \ - pubsub_pmi_component.c - -mcacomponentdir = $(ompilibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_pubsub_pmi_la_SOURCES = $(local_sources) -mca_pubsub_pmi_la_LDFLAGS = -module -avoid-version $(pubsub_pmi_LDFLAGS) -mca_pubsub_pmi_la_LIBADD = $(pubsub_pmi_LIBS) - -noinst_LTLIBRARIES = $(component_noinst) -libmca_pubsub_pmi_la_SOURCES = $(local_sources) -libmca_pubsub_pmi_la_LIBADD = $(pubsub_pmi_LIBS) -libmca_pubsub_pmi_la_LDFLAGS = -module -avoid-version $(pubsub_pmi_LDFLAGS) - diff --git a/ompi/mca/pubsub/pmi/configure.m4 b/ompi/mca/pubsub/pmi/configure.m4 deleted file mode 100644 index 3ec0f33821..0000000000 --- a/ompi/mca/pubsub/pmi/configure.m4 +++ /dev/null @@ -1,27 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# MCA_ompi_pubsub_pmi_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_ompi_pubsub_pmi_CONFIG], [ - AC_CONFIG_FILES([ompi/mca/pubsub/pmi/Makefile]) - - OPAL_CHECK_PMI([pubsub_pmi], [pubsub_pmi_good=1], [pubsub_pmi_good=0]) - - # Evaluate succeed / fail - AS_IF([test "$pubsub_pmi_good" = 1], - [$1], - [$2]) - - # set build flags to use in makefile - AC_SUBST([pubsub_pmi_CPPFLAGS]) - AC_SUBST([pubsub_pmi_LDFLAGS]) - AC_SUBST([pubsub_pmi_LIBS]) - -]) diff --git a/ompi/mca/pubsub/pmi/owner.txt b/ompi/mca/pubsub/pmi/owner.txt deleted file mode 100644 index 4ad6f408ca..0000000000 --- a/ompi/mca/pubsub/pmi/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: INTEL -status: maintenance diff --git a/ompi/mca/pubsub/pmi/pubsub_pmi.c b/ompi/mca/pubsub/pmi/pubsub_pmi.c deleted file mode 100644 index 09de30bceb..0000000000 --- a/ompi/mca/pubsub/pmi/pubsub_pmi.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" -#include "ompi/constants.h" - - -#include "opal/mca/pmix/pmix.h" - -#include "ompi/info/info.h" -#include "ompi/mca/rte/rte.h" -#include "ompi/mca/pubsub/base/base.h" -#include "pubsub_pmi.h" - -/* - * Init the module - */ -static int init(void) -{ - // did the pmix.init in the component - return OMPI_SUCCESS; -} - -/* - * publish the port_name for the specified service_name. - */ -static int publish(const char *service_name, ompi_info_t *info, const char *port_name) -{ - pmix_info_t *p; - opal_list_t xfer; - ompi_info_entry_t *ie; - int rc; - - /* transfer the ompi_info_t data to an array of pmix_info_t structs */ - OBJ_CONSTRUCT(&xfer, opal_list_t); - OPAL_LIST_FOREACH(ie, &info->super, ompi_info_entry_t) { - p = OBJ_NEW(pmix_info_t); - strncpy(p->key, ie->ie_key, PMIX_MAX_INFO_KEY); - strncpy(p->value, ie->ie_value, PMIX_MAX_INFO_VAL); - opal_list_append(&xfer, &p->super); - } - - rc = opal_pmix.publish(service_name, &xfer, port_name); - OPAL_LIST_DESTRUCT(&xfer); - return rc; -} - -static char* lookup(const char *service_name, ompi_info_t *info) -{ - char port[PMIX_MAX_VALLEN], *ret; - pmix_info_t *p; - opal_list_t xfer; - ompi_info_entry_t *ie; - int rc; - - /* transfer the ompi_info_t data to an array of pmix_info_t structs */ - OBJ_CONSTRUCT(&xfer, opal_list_t); - OPAL_LIST_FOREACH(ie, &info->super, ompi_info_entry_t) { - p = OBJ_NEW(pmix_info_t); - strncpy(p->key, ie->ie_key, PMIX_MAX_INFO_KEY); - strncpy(p->value, ie->ie_value, PMIX_MAX_INFO_VAL); - opal_list_append(&xfer, &p->super); - } - rc = opal_pmix.lookup(service_name, &xfer, port, PMIX_MAX_VALLEN); - OPAL_LIST_DESTRUCT(&xfer); - - /* in error case port will be set to NULL - * this is what our callers expect to see - * In future maybe some error handling need? - */ - if( rc != OPAL_SUCCESS ){ - // improve error processing - return NULL; - } - ret = strdup(port); - return ret; -} - -/* - * delete the entry */ -static int unpublish(const char *service_name, ompi_info_t *info) -{ - pmix_info_t *p; - opal_list_t xfer; - ompi_info_entry_t *ie; - int rc; - - /* transfer the ompi_info_t data to an array of pmix_info_t structs */ - OBJ_CONSTRUCT(&xfer, opal_list_t); - OPAL_LIST_FOREACH(ie, &info->super, ompi_info_entry_t) { - p = OBJ_NEW(pmix_info_t); - strncpy(p->key, ie->ie_key, PMIX_MAX_INFO_KEY); - strncpy(p->value, ie->ie_value, PMIX_MAX_INFO_VAL); - opal_list_append(&xfer, &p->super); - } - rc = opal_pmix.unpublish(service_name, &xfer); - OPAL_LIST_DESTRUCT(&xfer); - return rc; -} - - -/* - * finalize the module - */ -static int finalize(void) -{ - return OMPI_SUCCESS; -} - -/* - * instantiate the module - */ -ompi_pubsub_base_module_t ompi_pubsub_pmi_module = { - init, - publish, - unpublish, - lookup, - finalize -}; diff --git a/ompi/mca/pubsub/pmi/pubsub_pmi.h b/ompi/mca/pubsub/pmi/pubsub_pmi.h deleted file mode 100644 index a2556c7c65..0000000000 --- a/ompi/mca/pubsub/pmi/pubsub_pmi.h +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OMPI_PUBSUB_PMI_H -#define OMPI_PUBSUB_PMI_H - -#include "ompi/mca/pubsub/pubsub.h" - -BEGIN_C_DECLS - -/* access to module */ -extern ompi_pubsub_base_module_t ompi_pubsub_pmi_module; - -/* access to component */ -OMPI_MODULE_DECLSPEC extern ompi_pubsub_base_component_t mca_pubsub_pmi_component; - -END_C_DECLS - -#endif /* OMPI_PUBSUB_PMI_H */ diff --git a/ompi/mca/pubsub/pmi/pubsub_pmi_component.c b/ompi/mca/pubsub/pmi/pubsub_pmi_component.c deleted file mode 100644 index 69899afe48..0000000000 --- a/ompi/mca/pubsub/pmi/pubsub_pmi_component.c +++ /dev/null @@ -1,90 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "ompi_config.h" - -#include "opal/runtime/opal_params.h" -#include "opal/mca/pmix/pmix.h" - -#include "ompi/constants.h" -#include "ompi/mca/rte/rte.h" - -#include "pubsub_pmi.h" - -static int pubsub_pmi_component_register(void); -static int pubsub_pmi_component_open(void); -static int pubsub_pmi_component_close(void); -static int pubsub_pmi_component_query(mca_base_module_t **module, int *priority); - -static int my_priority = 100; /* must be above "orte" component */ - -ompi_pubsub_base_component_t mca_pubsub_pmi_component = { - .base_version = { - OMPI_PUBSUB_BASE_VERSION_2_0_0, - - .mca_component_name = "pmi", - MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION, - OMPI_RELEASE_VERSION), - .mca_open_component = pubsub_pmi_component_open, - .mca_close_component = pubsub_pmi_component_close, - .mca_query_component = pubsub_pmi_component_query, - .mca_register_component_params = pubsub_pmi_component_register, - }, - .base_data = { - /* This component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, -}; - -static int pubsub_pmi_component_register(void) -{ - my_priority = 100; - (void) mca_base_component_var_register(&mca_pubsub_pmi_component.base_version, - "priority", "Priority of the pubsub pmi component", - MCA_BASE_VAR_TYPE_INT, NULL, 0, 0, - OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, - &my_priority); - - return OMPI_SUCCESS; -} - -static int pubsub_pmi_component_open(void) -{ - return OMPI_SUCCESS; -} - -static int pubsub_pmi_component_close(void) -{ - if (NULL != opal_pmix.finalize) { - opal_pmix.finalize(); - } - return OMPI_SUCCESS; -} - -static int pubsub_pmi_component_query(mca_base_module_t **module, int *priority) -{ - if (NULL != opal_pmix.init) { - - if (OPAL_SUCCESS == opal_pmix.init()) { - *priority = my_priority; - *module = (mca_base_module_t *)&ompi_pubsub_pmi_module; - return OMPI_SUCCESS; - } - } - - /* we can't run */ - *priority = -1; - *module = NULL; - return OMPI_ERROR; -} diff --git a/ompi/mca/pubsub/pubsub.h b/ompi/mca/pubsub/pubsub.h deleted file mode 100644 index 434d66c80f..0000000000 --- a/ompi/mca/pubsub/pubsub.h +++ /dev/null @@ -1,108 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - * - * Dynamic Process Management Interface - * - */ - -#ifndef OMPI_MCA_PUBSUB_H -#define OMPI_MCA_PUBSUB_H - -#include "ompi_config.h" - -#include "ompi/mca/mca.h" -#include "opal/mca/base/base.h" - - -#include "ompi/info/info.h" - -BEGIN_C_DECLS - -/* - * Initialize a module - */ -typedef int (*ompi_pubsub_base_module_init_fn_t)(void); - -/* - * Publish a data item - */ -typedef int (*ompi_pubsub_base_module_publish_fn_t)(const char *service, ompi_info_t *info, const char *port); - -/* - * Unpublish a data item - */ -typedef int (*ompi_pubsub_base_module_unpublish_fn_t)(const char *service, ompi_info_t *info); - -/* - * Lookup a data item - */ -typedef char* (*ompi_pubsub_base_module_lookup_fn_t)(const char *service, ompi_info_t *info); - -/* - * Finalize a module - */ -typedef int (*ompi_pubsub_base_module_finalize_fn_t)(void); - -/** -* Structure for PUBSUB modules - */ -struct ompi_pubsub_base_module_1_0_0_t { - /** Initialization Function */ - ompi_pubsub_base_module_init_fn_t init; - /* Publish */ - ompi_pubsub_base_module_publish_fn_t publish; - /* Unpublish */ - ompi_pubsub_base_module_unpublish_fn_t unpublish; - /* Lookup */ - ompi_pubsub_base_module_lookup_fn_t lookup; - /* finalize */ - ompi_pubsub_base_module_finalize_fn_t finalize; -}; -typedef struct ompi_pubsub_base_module_1_0_0_t ompi_pubsub_base_module_1_0_0_t; -typedef struct ompi_pubsub_base_module_1_0_0_t ompi_pubsub_base_module_t; - -OMPI_DECLSPEC extern ompi_pubsub_base_module_t ompi_pubsub; - - -/** - * Structure for PUBSUB components. - */ -struct ompi_pubsub_base_component_2_0_0_t { - /** MCA base component */ - mca_base_component_t base_version; - /** MCA base data */ - mca_base_component_data_t base_data; -}; -typedef struct ompi_pubsub_base_component_2_0_0_t ompi_pubsub_base_component_2_0_0_t; -typedef struct ompi_pubsub_base_component_2_0_0_t ompi_pubsub_base_component_t; - -/** - * Macro for use in components that are of type PUBSUB - */ -#define OMPI_PUBSUB_BASE_VERSION_2_0_0 \ - OMPI_MCA_BASE_VERSION_2_1_0("pubsub", 2, 0, 0) - - -END_C_DECLS - -#endif /* OMPI_MCA_PUBSUB_H */ diff --git a/ompi/mca/rte/orte/rte_orte.h b/ompi/mca/rte/orte/rte_orte.h index 4a674b8d89..7796204bf6 100644 --- a/ompi/mca/rte/orte/rte_orte.h +++ b/ompi/mca/rte/orte/rte_orte.h @@ -1,7 +1,7 @@ /* * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -50,6 +50,8 @@ typedef orte_ns_cmp_bitmask_t ompi_rte_cmp_bitmask_t; #define OMPI_PROC_MY_NAME ORTE_PROC_MY_NAME #define OMPI_NAME_PRINT(a) ORTE_NAME_PRINT((const orte_process_name_t*)a) #define ompi_rte_compare_name_fields(a, b, c) orte_util_compare_name_fields(a, (const orte_process_name_t*)(b), (const orte_process_name_t*)(c)) +#define ompi_rte_convert_string_to_process_name(a,b) orte_util_convert_string_to_process_name(a,b) +#define ompi_rte_convert_process_name_to_string(a,b) orte_util_convert_process_name_to_string(a,b) #define OMPI_NAME_WILDCARD ORTE_NAME_WILDCARD #define OMPI_NODE_RANK_INVALID ORTE_NODE_RANK_INVALID #define OMPI_LOCAL_RANK_INVALID ORTE_LOCAL_RANK_INVALID diff --git a/ompi/mca/rte/orte/rte_orte_module.c b/ompi/mca/rte/orte/rte_orte_module.c index 61c45cf5fd..c82b25ce40 100644 --- a/ompi/mca/rte/orte/rte_orte_module.c +++ b/ompi/mca/rte/orte/rte_orte_module.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved * Copyright (c) 2012-2014 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. @@ -19,7 +19,6 @@ #include "opal/util/argv.h" #include "opal/util/proc.h" #include "opal/util/opal_getcwd.h" -#include "opal/mca/dstore/dstore.h" #include "opal/mca/pmix/pmix.h" #include "opal/threads/threads.h" #include "opal/class/opal_list.h" diff --git a/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c b/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c index c275053b16..0cf79622b3 100644 --- a/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c +++ b/ompi/mca/topo/treematch/topo_treematch_dist_graph_create.c @@ -4,6 +4,7 @@ * reserved. * Copyright (c) 2011-2015 INRIA. All rights reserved. * Copyright (c) 2012-2015 Bordeaux Poytechnic Institute + * Copyright (c) 2015 Intel, Inc. All rights reserved * $COPYRIGHT$ * * Additional copyrights may follow @@ -27,7 +28,7 @@ #include "ompi/mca/pml/pml.h" -#include "opal/mca/dstore/dstore.h" +#include "opal/mca/pmix/pmix.h" #define ERR_EXIT(ERR) \ do { free(local_pattern); \ @@ -178,7 +179,7 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, for(i = 0; i < size ; i++) { proc = ompi_group_peer_lookup(comm_old->c_local_group, i); pval = &val; - OPAL_MODEX_RECV_VALUE(err, OPAL_DSTORE_NODEID, &(proc->super), &pval, OPAL_UINT32); + OPAL_MODEX_RECV_VALUE(err, OPAL_PMIX_NODEID, &(proc->super.proc_name), &pval, OPAL_UINT32); if( OPAL_SUCCESS != err ) { opal_output(0, "Unable to extract peer %s nodeid from the modex.\n", OMPI_NAME_PRINT(&(proc->super))); @@ -863,20 +864,20 @@ int mca_topo_treematch_dist_graph_create(mca_topo_base_module_t* topo_module, #endif OBJ_CONSTRUCT(&kv, opal_value_t); - kv.key = strdup(OPAL_DSTORE_CPUSET); + kv.key = strdup(OPAL_PMIX_CPUSET); kv.type = OPAL_STRING; kv.data.string = strdup(set_as_string); - (void)opal_dstore.store(opal_dstore_internal, (opal_process_name_t*)ORTE_PROC_MY_NAME, &kv); + (void)opal_pmix.store_local((opal_process_name_t*)ORTE_PROC_MY_NAME, &kv); OBJ_DESTRUCT(&kv); locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology, orte_process_info.cpuset,set_as_string); OBJ_CONSTRUCT(&kv, opal_value_t); - kv.key = strdup(OPAL_DSTORE_LOCALITY); + kv.key = strdup(OPAL_PMIX_LOCALITY); kv.type = OPAL_UINT16; kv.data.uint16 = locality; - (void)opal_dstore.store(opal_dstore_internal, (opal_process_name_t*)ORTE_PROC_MY_NAME, &kv); + (void)opal_pmix.store_local((opal_process_name_t*)ORTE_PROC_MY_NAME, &kv); OBJ_DESTRUCT(&kv); if( OMPI_SUCCESS != (err = ompi_comm_create(comm_old, diff --git a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_eventlog.c b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_eventlog.c index 154312db32..a063fde633 100644 --- a/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_eventlog.c +++ b/ompi/mca/vprotocol/pessimist/vprotocol_pessimist_eventlog.c @@ -3,6 +3,7 @@ * All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -12,55 +13,35 @@ #include "ompi_config.h" #include "vprotocol_pessimist_eventlog.h" - -#include "ompi/mca/dpm/dpm.h" -#include "ompi/mca/pubsub/pubsub.h" +#include "opal/mca/pmix/pmix.h" +#include "ompi/dpm/dpm.h" int vprotocol_pessimist_event_logger_connect(int el_rank, ompi_communicator_t **el_comm) { int rc; - opal_buffer_t *buffer; char *port; - ompi_process_name_t el_proc; - char *hnp_uri, *rml_uri; - ompi_rml_tag_t el_tag; - char name[MPI_MAX_PORT_NAME]; int rank; vprotocol_pessimist_clock_t connect_info[2]; + opal_list_t results; + opal_pmix_pdata_t *pdat; - snprintf(name, MPI_MAX_PORT_NAME, VPROTOCOL_EVENT_LOGGER_NAME_FMT, el_rank); - port = ompi_pubsub.lookup(name, MPI_INFO_NULL); - if(NULL == port) - { + OBJ_CONSTRUCT(&results, opal_list_t); + pdat = OBJ_NEW(opal_pmix_pdata_t); + asprintf(&pdat->key, VPROTOCOL_EVENT_LOGGER_NAME_FMT, el_rank); + opal_list_append(&results, &pdat->super); + + rc = opal_pmix.lookup(OPAL_PMIX_NAMESPACE, &results); + if (OPAL_SUCCESS != rc || + OPAL_STRING != pdat->value.type || + NULL == pdat->value.data.string) { + OPAL_LIST_DESTRUCT(&results); return OMPI_ERR_NOT_FOUND; } + port = strdup(pdat->value.data.string); + OPAL_LIST_DESTRUCT(&results); V_OUTPUT_VERBOSE(45, "Found port < %s >", port); - /* separate the string into the HNP and RML URI and tag */ - if (OMPI_SUCCESS != (rc = ompi_dpm.parse_port(port, &hnp_uri, &rml_uri, &el_tag))) { - OMPI_ERROR_LOG(rc); - return rc; - } - /* extract the originating proc's name */ - if (OMPI_SUCCESS != (rc = ompi_rte_parse_uris(rml_uri, &el_proc, NULL))) { - OMPI_ERROR_LOG(rc); - free(rml_uri); free(hnp_uri); - return rc; - } - /* make sure we can route rml messages to the destination */ - if (OMPI_SUCCESS != (rc = ompi_dpm.route_to_port(hnp_uri, &el_proc))) { - OMPI_ERROR_LOG(rc); - free(rml_uri); free(hnp_uri); - return rc; - } - free(rml_uri); free(hnp_uri); - - /* Send an rml message to tell the remote end to wake up and jump into - * connect/accept */ - buffer = OBJ_NEW(opal_buffer_t); - ompi_rte_send_buffer_nb(&el_proc, buffer, el_tag+1, NULL, NULL); - - rc = ompi_dpm.connect_accept(MPI_COMM_SELF, 0, port, true, el_comm); + rc = ompi_dpm_connect_accept(MPI_COMM_SELF, 0, port, true, el_comm); if(OMPI_SUCCESS != rc) { OMPI_ERROR_LOG(rc); } @@ -86,7 +67,7 @@ int vprotocol_pessimist_event_logger_connect(int el_rank, ompi_communicator_t ** int vprotocol_pessimist_event_logger_disconnect(ompi_communicator_t *el_comm) { - ompi_dpm.disconnect(el_comm); + ompi_dpm_disconnect(el_comm); return OMPI_SUCCESS; } diff --git a/ompi/mpi/c/close_port.c b/ompi/mpi/c/close_port.c index 8aab0c5d63..3ff3fe855c 100644 --- a/ompi/mpi/c/close_port.c +++ b/ompi/mpi/c/close_port.c @@ -12,6 +12,7 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -25,7 +26,7 @@ #include "ompi/runtime/params.h" #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" -#include "ompi/mca/dpm/dpm.h" +#include "ompi/dpm/dpm.h" #if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES @@ -53,7 +54,7 @@ int MPI_Close_port(const char *port_name) FUNC_NAME); } - ret = ompi_dpm.close_port(port_name); + ret = ompi_dpm_close_port(port_name); OMPI_ERRHANDLER_RETURN(ret, MPI_COMM_WORLD, ret, FUNC_NAME); } diff --git a/ompi/mpi/c/comm_accept.c b/ompi/mpi/c/comm_accept.c index 41c773f91f..5a5310317c 100644 --- a/ompi/mpi/c/comm_accept.c +++ b/ompi/mpi/c/comm_accept.c @@ -14,6 +14,7 @@ * Copyright (c) 2008 University of Houston, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,7 +29,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/info/info.h" -#include "ompi/mca/dpm/dpm.h" +#include "ompi/dpm/dpm.h" #include "ompi/memchecker.h" #if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES @@ -95,11 +96,11 @@ int MPI_Comm_accept(const char *port_name, MPI_Info info, int root, OPAL_CR_ENTER_LIBRARY(); if ( rank == root ) { - rc = ompi_dpm.connect_accept (comm, root, port_name, send_first, + rc = ompi_dpm_connect_accept (comm, root, port_name, send_first, &newcomp); } else { - rc = ompi_dpm.connect_accept (comm, root, NULL, send_first, + rc = ompi_dpm_connect_accept (comm, root, NULL, send_first, &newcomp); } diff --git a/ompi/mpi/c/comm_connect.c b/ompi/mpi/c/comm_connect.c index b3beec6bf3..b7cbc33840 100644 --- a/ompi/mpi/c/comm_connect.c +++ b/ompi/mpi/c/comm_connect.c @@ -14,6 +14,7 @@ * Copyright (c) 2008 University of Houston. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,7 +29,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/info/info.h" -#include "ompi/mca/dpm/dpm.h" +#include "ompi/dpm/dpm.h" #include "ompi/memchecker.h" #if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES @@ -97,11 +98,11 @@ int MPI_Comm_connect(const char *port_name, MPI_Info info, int root, OPAL_CR_ENTER_LIBRARY(); if ( rank == root ) { - rc = ompi_dpm.connect_accept (comm, root, port_name, send_first, + rc = ompi_dpm_connect_accept (comm, root, port_name, send_first, &newcomp); } else { - rc = ompi_dpm.connect_accept (comm, root, NULL, send_first, + rc = ompi_dpm_connect_accept (comm, root, NULL, send_first, &newcomp); } diff --git a/ompi/mpi/c/comm_disconnect.c b/ompi/mpi/c/comm_disconnect.c index d623d8f689..5fb92be285 100644 --- a/ompi/mpi/c/comm_disconnect.c +++ b/ompi/mpi/c/comm_disconnect.c @@ -10,6 +10,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -33,7 +34,7 @@ #include "ompi/mpi/c/profile/defines.h" #endif -#include "ompi/mca/dpm/dpm.h" +#include "ompi/dpm/dpm.h" static const char FUNC_NAME[] = "MPI_Comm_disconnect"; @@ -62,7 +63,7 @@ int MPI_Comm_disconnect(MPI_Comm *comm) OPAL_CR_ENTER_LIBRARY(); if ( OMPI_COMM_IS_DYNAMIC(*comm)) { - if (OMPI_SUCCESS != ompi_dpm.disconnect (*comm)) { + if (OMPI_SUCCESS != ompi_dpm_disconnect (*comm)) { ret = OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_COMM, FUNC_NAME); } } diff --git a/ompi/mpi/c/comm_join.c b/ompi/mpi/c/comm_join.c index b3fe0cf588..d1493f8eb0 100644 --- a/ompi/mpi/c/comm_join.c +++ b/ompi/mpi/c/comm_join.c @@ -38,7 +38,7 @@ #include "ompi/runtime/params.h" #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" -#include "ompi/mca/dpm/dpm.h" +#include "ompi/dpm/dpm.h" #if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES @@ -59,7 +59,6 @@ int MPI_Comm_join(int fd, MPI_Comm *intercomm) int rc; uint32_t len, rlen, llen, lrlen; int send_first=0; - char *rport; ompi_process_name_t rname, tmp_name; ompi_communicator_t *newcomp; @@ -76,12 +75,6 @@ int MPI_Comm_join(int fd, MPI_Comm *intercomm) OPAL_CR_ENTER_LIBRARY(); - /* open a port using the specified tag */ - if (OMPI_SUCCESS != (rc = ompi_dpm.open_port(port_name, OMPI_COMM_JOIN_TAG))) { - OPAL_CR_EXIT_LIBRARY(); - return rc; - } - /* send my process name */ tmp_name = *OMPI_PROC_MY_NAME; OMPI_PROCESS_NAME_HTON(tmp_name); @@ -107,34 +100,30 @@ int MPI_Comm_join(int fd, MPI_Comm *intercomm) send_first = true; } - /* sendrecv port-name through the socket connection. - Need to determine somehow how to avoid a potential deadlock - here. */ - llen = (uint32_t)(strlen(port_name)+1); - len = htonl(llen); - - ompi_socket_send( fd, (char *) &len, sizeof(uint32_t)); - ompi_socket_recv (fd, (char *) &rlen, sizeof(uint32_t)); - - lrlen = ntohl(rlen); - rport = (char *) malloc (lrlen); - if ( NULL == rport ) { - *intercomm = MPI_COMM_NULL; - OPAL_CR_EXIT_LIBRARY(); - return MPI_ERR_INTERN; - } - /* Assumption: socket_send should not block, even if the socket is not configured to be non-blocking, because the message length are so short. */ - ompi_socket_send (fd, port_name, llen); - ompi_socket_recv (fd, rport, lrlen); - /* use the port we received to connect/accept */ - rc = ompi_dpm.connect_accept (MPI_COMM_SELF, 0, rport, send_first, &newcomp); + /* we will only use the send_first proc's port name, + * so pass it to the recv_first participant */ + if (send_first) { + /* open a port */ + if (OMPI_SUCCESS != (rc = ompi_dpm_open_port(port_name))) { + OPAL_CR_EXIT_LIBRARY(); + return rc; + } + llen = (uint32_t)(strlen(port_name)+1); + len = htonl(llen); + ompi_socket_send( fd, (char *) &len, sizeof(uint32_t)); + ompi_socket_send (fd, port_name, llen); + } else { + ompi_socket_recv (fd, (char *) &rlen, sizeof(uint32_t)); + lrlen = ntohl(rlen); + ompi_socket_recv (fd, port_name, lrlen); + } - - free ( rport ); + /* use the port to connect/accept */ + rc = ompi_dpm_connect_accept (MPI_COMM_SELF, 0, port_name, send_first, &newcomp); *intercomm = newcomp; OMPI_ERRHANDLER_RETURN (rc, MPI_COMM_SELF, rc, FUNC_NAME); diff --git a/ompi/mpi/c/comm_spawn.c b/ompi/mpi/c/comm_spawn.c index 6cb0fb2de4..0ea821ceda 100644 --- a/ompi/mpi/c/comm_spawn.c +++ b/ompi/mpi/c/comm_spawn.c @@ -14,6 +14,7 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,7 +29,7 @@ #include "ompi/runtime/params.h" #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" -#include "ompi/mca/dpm/dpm.h" +#include "ompi/dpm/dpm.h" #include "ompi/memchecker.h" #if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES @@ -109,7 +110,7 @@ int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info inf if (!non_mpi) { /* Open a port. The port_name is passed as an environment variable to the children. */ - if (OMPI_SUCCESS != (rc = ompi_dpm.open_port (port_name, OMPI_RML_TAG_INVALID))) { + if (OMPI_SUCCESS != (rc = ompi_dpm_open_port (port_name))) { goto error; } } else if (1 < ompi_comm_size(comm)) { @@ -117,7 +118,7 @@ int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info inf rc = OMPI_ERR_NOT_SUPPORTED; goto error; } - if (OMPI_SUCCESS != (rc = ompi_dpm.spawn (1, &command, &argv, &maxprocs, + if (OMPI_SUCCESS != (rc = ompi_dpm_spawn (1, &command, &argv, &maxprocs, &info, port_name))) { goto error; } @@ -126,7 +127,7 @@ int MPI_Comm_spawn(const char *command, char *argv[], int maxprocs, MPI_Info inf if (non_mpi) { newcomp = MPI_COMM_NULL; } else { - rc = ompi_dpm.connect_accept (comm, root, port_name, send_first, &newcomp); + rc = ompi_dpm_connect_accept (comm, root, port_name, send_first, &newcomp); } error: @@ -134,7 +135,7 @@ error: /* close the port */ if (rank == root && !non_mpi) { - ompi_dpm.close_port(port_name); + ompi_dpm_close_port(port_name); } /* set error codes */ diff --git a/ompi/mpi/c/comm_spawn_multiple.c b/ompi/mpi/c/comm_spawn_multiple.c index e46055d641..70cc696d17 100644 --- a/ompi/mpi/c/comm_spawn_multiple.c +++ b/ompi/mpi/c/comm_spawn_multiple.c @@ -14,6 +14,7 @@ * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -28,7 +29,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/info/info.h" -#include "ompi/mca/dpm/dpm.h" +#include "ompi/dpm/dpm.h" #include "ompi/memchecker.h" #if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES @@ -149,7 +150,7 @@ int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_o if (!non_mpi) { /* Open a port. The port_name is passed as an environment variable to the children. */ - if (OMPI_SUCCESS != (rc = ompi_dpm.open_port (port_name, OMPI_RML_TAG_INVALID))) { + if (OMPI_SUCCESS != (rc = ompi_dpm_open_port (port_name))) { goto error; } } else if (1 < ompi_comm_size(comm)) { @@ -157,7 +158,7 @@ int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_o rc = OMPI_ERR_NOT_SUPPORTED; goto error; } - if (OMPI_SUCCESS != (rc = ompi_dpm.spawn(count, (const char **) array_of_commands, + if (OMPI_SUCCESS != (rc = ompi_dpm_spawn(count, (const char **) array_of_commands, array_of_argv, array_of_maxprocs, array_of_info, port_name))) { goto error; @@ -167,7 +168,7 @@ int MPI_Comm_spawn_multiple(int count, char *array_of_commands[], char **array_o if (non_mpi) { newcomp = MPI_COMM_NULL; } else { - rc = ompi_dpm.connect_accept (comm, root, port_name, send_first, &newcomp); + rc = ompi_dpm_connect_accept (comm, root, port_name, send_first, &newcomp); } error: @@ -175,7 +176,7 @@ error: /* close the port */ if (rank == root && !non_mpi) { - ompi_dpm.close_port(port_name); + ompi_dpm_close_port(port_name); } /* set array of errorcodes */ diff --git a/ompi/mpi/c/lookup_name.c b/ompi/mpi/c/lookup_name.c index 773af19e07..1613638bc7 100644 --- a/ompi/mpi/c/lookup_name.c +++ b/ompi/mpi/c/lookup_name.c @@ -12,6 +12,7 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -21,12 +22,14 @@ #include "ompi_config.h" +#include "opal/class/opal_list.h" +#include "opal/mca/pmix/pmix.h" + #include "ompi/mpi/c/bindings.h" #include "ompi/runtime/params.h" #include "ompi/errhandler/errhandler.h" #include "ompi/info/info.h" #include "ompi/communicator/communicator.h" -#include "ompi/mca/pubsub/pubsub.h" #if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES #pragma weak MPI_Lookup_name = PMPI_Lookup_name @@ -41,7 +44,12 @@ static const char FUNC_NAME[] = "MPI_Lookup_name"; int MPI_Lookup_name(const char *service_name, MPI_Info info, char *port_name) { - char *tmp; + char range[OPAL_MAX_INFO_VAL]; + int flag=0, ret; + opal_pmix_data_range_t rng; + bool range_given = false; + opal_list_t results; + opal_pmix_pdata_t *pdat; if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -62,22 +70,44 @@ int MPI_Lookup_name(const char *service_name, MPI_Info info, char *port_name) OPAL_CR_ENTER_LIBRARY(); - /* - * No predefined info-objects for this function in MPI-2, - * therefore, we do not parse the info-object at the moment. - */ + /* OMPI supports info keys to pass the range to + * be searched for the given key */ + if (MPI_INFO_NULL != info) { + ompi_info_get (info, "range", sizeof(range) - 1, range, &flag); + if (flag) { + range_given = true; + if (0 == strcmp(range, "nspace")) { + rng = OPAL_PMIX_NAMESPACE; // share only with procs in same nspace + } else if (0 == strcmp(range, "session")) { + rng = OPAL_PMIX_SESSION; // share only with procs in same session + } else { + /* unrecognized scope */ + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, + FUNC_NAME); + } + } + } + if (!range_given) { + /* default to nspace */ + rng = OPAL_PMIX_NAMESPACE; + } - /* - * if multiple entries found, this implementation uses - * at the moment the first entry. - */ - tmp = (char *) ompi_pubsub.lookup(service_name, info); - if ( NULL == tmp ) { + /* collect the findings */ + OBJ_CONSTRUCT(&results, opal_list_t); + pdat = OBJ_NEW(opal_pmix_pdata_t); + pdat->key = strdup(service_name); + opal_list_append(&results, &pdat->super); + + ret = opal_pmix.lookup(rng, &results); + if (OPAL_SUCCESS != ret || + OPAL_STRING != pdat->value.type || + NULL == pdat->value.data.string) { return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_NAME, FUNC_NAME); } - strncpy ( port_name, tmp, MPI_MAX_PORT_NAME ); + strncpy ( port_name, pdat->value.data.string, MPI_MAX_PORT_NAME ); + OPAL_LIST_DESTRUCT(&results); OPAL_CR_EXIT_LIBRARY(); return MPI_SUCCESS; diff --git a/ompi/mpi/c/open_port.c b/ompi/mpi/c/open_port.c index 0cf2f18b88..cc30116af5 100644 --- a/ompi/mpi/c/open_port.c +++ b/ompi/mpi/c/open_port.c @@ -9,6 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -23,7 +24,7 @@ #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/info/info.h" -#include "ompi/mca/dpm/dpm.h" +#include "ompi/dpm/dpm.h" #if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES #pragma weak MPI_Open_port = PMPI_Open_port @@ -66,7 +67,7 @@ int MPI_Open_port(MPI_Info info, char *port_name) OPAL_CR_ENTER_LIBRARY(); - rc = ompi_dpm.open_port(port_name, OMPI_RML_TAG_INVALID); + rc = ompi_dpm_open_port(port_name); OMPI_ERRHANDLER_RETURN(rc, MPI_COMM_WORLD, rc, FUNC_NAME); } diff --git a/ompi/mpi/c/publish_name.c b/ompi/mpi/c/publish_name.c index 9f08b96974..3ae6364dff 100644 --- a/ompi/mpi/c/publish_name.c +++ b/ompi/mpi/c/publish_name.c @@ -12,7 +12,8 @@ * All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. - * $COPYRIGHT$ + * Copyright (c) 2015 Intel, Inc. All rights reserved. +* $COPYRIGHT$ * * Additional copyrights may follow * @@ -21,12 +22,14 @@ #include "ompi_config.h" #include +#include "opal/class/opal_list.h" +#include "opal/mca/pmix/pmix.h" + #include "ompi/mpi/c/bindings.h" #include "ompi/runtime/params.h" #include "ompi/errhandler/errhandler.h" #include "ompi/info/info.h" #include "ompi/communicator/communicator.h" -#include "ompi/mca/pubsub/pubsub.h" #if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES #pragma weak MPI_Publish_name = PMPI_Publish_name @@ -43,6 +46,14 @@ int MPI_Publish_name(const char *service_name, MPI_Info info, const char *port_name) { int rc; + char range[OPAL_MAX_INFO_VAL]; + int flag=0; + opal_pmix_data_range_t rng; + bool range_given = false; + opal_pmix_persistence_t persist; + bool persistence_given = false; + opal_list_t values; + opal_pmix_info_t *pinfo; if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -63,15 +74,62 @@ int MPI_Publish_name(const char *service_name, MPI_Info info, OPAL_CR_ENTER_LIBRARY(); - /* - * No predefined info-objects for this function in MPI-2, - * therefore, we do not parse the info-object at the moment. - */ + /* OMPI supports info keys to pass the range and persistence to + * be used for the given key */ + if (MPI_INFO_NULL != info) { + ompi_info_get (info, "range", sizeof(range) - 1, range, &flag); + if (flag) { + range_given = true; + if (0 == strcmp(range, "nspace")) { + rng = OPAL_PMIX_NAMESPACE; // share only with procs in same nspace + } else if (0 == strcmp(range, "session")) { + rng = OPAL_PMIX_SESSION; // share only with procs in same session + } else { + /* unrecognized range */ + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, + FUNC_NAME); + } + } + ompi_info_get (info, "persistence", sizeof(range) - 1, range, &flag); + if (flag) { + persistence_given = true; + if (0 == strcmp(range, "indef")) { + persist = OPAL_PMIX_PERSIST_INDEF; // retain until specifically deleted + } else if (0 == strcmp(range, "proc")) { + persist = OPAL_PMIX_PERSIST_PROC; // retain until publishing process terminates + } else if (0 == strcmp(range, "app")) { + persist = OPAL_PMIX_PERSIST_APP; // retain until application terminates + } else if (0 == strcmp(range, "session")) { + persist = OPAL_PMIX_PERSIST_SESSION; // retain until session/allocation terminates + } else { + /* unrecognized persistence */ + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, + FUNC_NAME); + } + } + } + if (!range_given) { + /* default to nspace */ + rng = OPAL_PMIX_NAMESPACE; + } + if (!persistence_given) { + persist = OPAL_PMIX_PERSIST_APP; + } + + /* publish the values */ + OBJ_CONSTRUCT(&values, opal_list_t); + pinfo = OBJ_NEW(opal_pmix_info_t); + pinfo->key = strdup(service_name); + pinfo->value.type = OPAL_STRING; + pinfo->value.data.string = strdup(port_name); + opal_list_append(&values, &pinfo->super); + + rc = opal_pmix.publish(rng, persist, &values); + OPAL_LIST_DESTRUCT(&values); - rc = ompi_pubsub.publish (service_name, info, port_name); OPAL_CR_EXIT_LIBRARY(); - if ( OMPI_SUCCESS != rc ) { - if (OMPI_EXISTS == rc) { + if ( OPAL_SUCCESS != rc ) { + if (OPAL_EXISTS == rc) { /* already exists - can't publish it */ return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_FILE_EXISTS, FUNC_NAME); diff --git a/ompi/mpi/c/unpublish_name.c b/ompi/mpi/c/unpublish_name.c index eee0c58140..2b60c04744 100644 --- a/ompi/mpi/c/unpublish_name.c +++ b/ompi/mpi/c/unpublish_name.c @@ -12,6 +12,7 @@ * All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -21,12 +22,15 @@ #include "ompi_config.h" #include +#include "opal/class/opal_list.h" +#include "opal/mca/pmix/pmix.h" +#include "opal/util/argv.h" + #include "ompi/mpi/c/bindings.h" #include "ompi/runtime/params.h" #include "ompi/communicator/communicator.h" #include "ompi/errhandler/errhandler.h" #include "ompi/info/info.h" -#include "ompi/mca/pubsub/pubsub.h" #if OPAL_HAVE_WEAK_SYMBOLS && OMPI_PROFILING_DEFINES #pragma weak MPI_Unpublish_name = PMPI_Unpublish_name @@ -43,6 +47,11 @@ int MPI_Unpublish_name(const char *service_name, MPI_Info info, const char *port_name) { int rc; + char range[OPAL_MAX_INFO_VAL]; + int flag=0; + opal_pmix_data_range_t rng; + bool range_given = false; + char **keys = NULL; if ( MPI_PARAM_CHECK ) { OMPI_ERR_INIT_FINALIZE(FUNC_NAME); @@ -63,19 +72,42 @@ int MPI_Unpublish_name(const char *service_name, MPI_Info info, OPAL_CR_ENTER_LIBRARY(); - /* - * No predefined info-objects for this function in MPI-2, - * therefore, we do not parse the info-object at the moment. - */ - rc = ompi_pubsub.unpublish(service_name, info); - if ( OMPI_SUCCESS != rc ) { - if (OMPI_ERR_NOT_FOUND == rc) { + /* OMPI supports info keys to pass the range to + * be searched for the given key */ + if (MPI_INFO_NULL != info) { + ompi_info_get (info, "range", sizeof(range) - 1, range, &flag); + if (flag) { + range_given = true; + if (0 == strcmp(range, "nspace")) { + rng = OPAL_PMIX_NAMESPACE; // share only with procs in same nspace + } else if (0 == strcmp(range, "session")) { + rng = OPAL_PMIX_SESSION; // share only with procs in same session + } else { + /* unrecognized range */ + return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ARG, + FUNC_NAME); + } + } + } + if (!range_given) { + /* default to nspace */ + rng = OPAL_PMIX_NAMESPACE; + } + + /* unpublish the service_name */ + opal_argv_append_nosize(&keys, service_name); + + rc = opal_pmix.unpublish(rng, keys); + opal_argv_free(keys); + + if ( OPAL_SUCCESS != rc ) { + if (OPAL_ERR_NOT_FOUND == rc) { /* service couldn't be found */ OPAL_CR_EXIT_LIBRARY(); return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_SERVICE, FUNC_NAME); } - if (OMPI_ERR_PERM == rc) { + if (OPAL_ERR_PERM == rc) { /* this process didn't own the specified service */ OPAL_CR_EXIT_LIBRARY(); return OMPI_ERRHANDLER_INVOKE(MPI_COMM_WORLD, MPI_ERR_ACCESS, diff --git a/ompi/proc/proc.c b/ompi/proc/proc.c index 99f3d03bd9..4701269c5f 100644 --- a/ompi/proc/proc.c +++ b/ompi/proc/proc.c @@ -12,7 +12,7 @@ * Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -33,7 +33,6 @@ #include "opal/dss/dss.h" #include "opal/util/arch.h" #include "opal/util/show_help.h" -#include "opal/mca/dstore/dstore.h" #include "opal/mca/hwloc/base/base.h" #include "opal/mca/pmix/pmix.h" @@ -115,8 +114,8 @@ int ompi_proc_init(void) opal_proc_local_set(&proc->super); #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT /* add our arch to the modex */ - OPAL_MODEX_SEND_VALUE(ret, PMIX_SYNC_REQD, PMIX_GLOBAL, - OPAL_DSTORE_ARCH, &opal_local_arch, OPAL_UINT32); + OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, + OPAL_PMIX_ARCH, &opal_local_arch, OPAL_UINT32); if (OPAL_SUCCESS != ret) { return ret; } @@ -141,29 +140,21 @@ int ompi_proc_complete_init(void) { ompi_proc_t *proc; int ret, errcode = OMPI_SUCCESS; - opal_list_t myvals; - opal_value_t *kv; + uint16_t u16, *u16ptr; OPAL_THREAD_LOCK(&ompi_proc_lock); + u16ptr = &u16; OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) { if (OMPI_CAST_RTE_NAME(&proc->super.proc_name)->vpid != OMPI_PROC_MY_NAME->vpid) { - /* get the locality information - do not use modex recv for - * this request as that will automatically cause the hostname - * to be loaded as well. All RTEs are required to provide this - * information at startup for procs on our node. Thus, not - * finding the info indicates that the proc is non-local. - */ - OBJ_CONSTRUCT(&myvals, opal_list_t); - if (OMPI_SUCCESS != (ret = opal_dstore.fetch(opal_dstore_internal, - &proc->super.proc_name, - OPAL_DSTORE_LOCALITY, &myvals))) { + /* get the locality information - all RTEs are required + * to provide this information at startup */ + OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCALITY, &proc->super.proc_name, &u16ptr, OPAL_UINT16); + if (OPAL_SUCCESS != ret) { proc->super.proc_flags = OPAL_PROC_NON_LOCAL; } else { - kv = (opal_value_t*)opal_list_get_first(&myvals); - proc->super.proc_flags = kv->data.uint16; + proc->super.proc_flags = u16; } - OPAL_LIST_DESTRUCT(&myvals); if (ompi_process_info.num_procs < ompi_direct_modex_cutoff) { /* IF the number of procs falls below the specified cutoff, @@ -172,7 +163,7 @@ int ompi_proc_complete_init(void) * ALL modex info for this proc) will have no appreciable * impact on launch scaling */ - OPAL_MODEX_RECV_VALUE(ret, OPAL_DSTORE_HOSTNAME, (opal_proc_t*)&proc->super, + OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_HOSTNAME, &proc->super.proc_name, (char**)&(proc->super.proc_hostname), OPAL_STRING); if (OPAL_SUCCESS != ret) { errcode = ret; @@ -194,7 +185,7 @@ int ompi_proc_complete_init(void) { uint32_t *ui32ptr; ui32ptr = &(proc->super.proc_arch); - OPAL_MODEX_RECV_VALUE(ret, OPAL_DSTORE_ARCH, (opal_proc_t*)&proc->super, + OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_ARCH, &proc->super.proc_name, (void**)&ui32ptr, OPAL_UINT32); if (OPAL_SUCCESS == ret) { /* if arch is different than mine, create a new convertor for this proc */ @@ -393,8 +384,7 @@ int ompi_proc_refresh(void) opal_list_item_t *item = NULL; ompi_vpid_t i = 0; int ret=OMPI_SUCCESS; - opal_list_t myvals; - opal_value_t *kv; + uint16_t u16, *u16ptr; OPAL_THREAD_LOCK(&ompi_proc_lock); @@ -405,6 +395,7 @@ int ompi_proc_refresh(void) /* Does not change: proc->super.proc_name.vpid */ OMPI_CAST_RTE_NAME(&proc->super.proc_name)->jobid = OMPI_PROC_MY_NAME->jobid; + u16ptr = &u16; /* Make sure to clear the local flag before we set it below */ proc->super.proc_flags = 0; @@ -416,22 +407,14 @@ int ompi_proc_refresh(void) proc->super.proc_arch = opal_local_arch; opal_proc_local_set(&proc->super); } else { - /* get the locality information - do not use modex recv for - * this request as that will automatically cause the hostname - * to be loaded as well. All RTEs are required to provide this - * information at startup for procs on our node. Thus, not - * finding the info indicates that the proc is non-local. - */ - OBJ_CONSTRUCT(&myvals, opal_list_t); - if (OMPI_SUCCESS != (ret = opal_dstore.fetch(opal_dstore_internal, - &proc->super.proc_name, - OPAL_DSTORE_LOCALITY, &myvals))) { + /* get the locality information - all RTEs are required + * to provide this information at startup */ + OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_LOCALITY, &proc->super.proc_name, &u16ptr, OPAL_UINT16); + if (OPAL_SUCCESS != ret) { proc->super.proc_flags = OPAL_PROC_NON_LOCAL; } else { - kv = (opal_value_t*)opal_list_get_first(&myvals); - proc->super.proc_flags = kv->data.uint16; + proc->super.proc_flags = u16; } - OPAL_LIST_DESTRUCT(&myvals); if (ompi_process_info.num_procs < ompi_direct_modex_cutoff) { /* IF the number of procs falls below the specified cutoff, @@ -440,7 +423,7 @@ int ompi_proc_refresh(void) * ALL modex info for this proc) will have no appreciable * impact on launch scaling */ - OPAL_MODEX_RECV_VALUE(ret, OPAL_DSTORE_HOSTNAME, (opal_proc_t*)&proc->super, + OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_HOSTNAME, &proc->super.proc_name, (char**)&(proc->super.proc_hostname), OPAL_STRING); if (OMPI_SUCCESS != ret) { break; @@ -459,7 +442,7 @@ int ompi_proc_refresh(void) { /* get the remote architecture */ uint32_t* uiptr = &(proc->super.proc_arch); - OPAL_MODEX_RECV_VALUE(ret, OPAL_DSTORE_ARCH, (opal_proc_t*)&proc->super, + OPAL_MODEX_RECV_VALUE(ret, OPAL_PMIX_ARCH, &proc->super.proc_name, (void**)&uiptr, OPAL_UINT32); if (OMPI_SUCCESS != ret) { break; @@ -484,7 +467,6 @@ int ompi_proc_refresh(void) int ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, - bool full_info, opal_buffer_t* buf) { int i, rc; @@ -510,65 +492,24 @@ ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, OPAL_THREAD_UNLOCK(&ompi_proc_lock); return rc; } - if (full_info) { - int32_t num_entries; - opal_value_t *kv; - opal_list_t data; - - /* fetch all info we know about the peer - while - * the remote procs may already know some of it, we cannot - * be certain they do. So we must include a full dump of - * everything we know about this proc - */ - OBJ_CONSTRUCT(&data, opal_list_t); - rc = opal_dstore.fetch(opal_dstore_internal, - &proclist[i]->super.proc_name, - NULL, &data); - if (OPAL_SUCCESS != rc) { - OMPI_ERROR_LOG(rc); - num_entries = 0; - } else { - /* count the number of entries we will send */ - num_entries = opal_list_get_size(&data); - } - - /* put the number of entries into the buffer */ - rc = opal_dss.pack(buf, &num_entries, 1, OPAL_INT32); - if (OPAL_SUCCESS != rc) { - OMPI_ERROR_LOG(rc); - break; - } - - /* if there are entries, store them */ - while (NULL != (kv = (opal_value_t*)opal_list_remove_first(&data))) { - if (OPAL_SUCCESS != (rc = opal_dss.pack(buf, &kv, 1, OPAL_VALUE))) { - OMPI_ERROR_LOG(rc); - break; - } - OBJ_RELEASE(kv); - } - OBJ_DESTRUCT(&data); - - } else { - rc = opal_dss.pack(buf, &(proclist[i]->super.proc_arch), 1, OPAL_UINT32); - if(rc != OPAL_SUCCESS) { - OMPI_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&ompi_proc_lock); - return rc; - } - rc = opal_dss.pack(buf, &(proclist[i]->super.proc_hostname), 1, OPAL_STRING); - if(rc != OPAL_SUCCESS) { - OMPI_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&ompi_proc_lock); - return rc; - } + rc = opal_dss.pack(buf, &(proclist[i]->super.proc_arch), 1, OPAL_UINT32); + if(rc != OPAL_SUCCESS) { + OMPI_ERROR_LOG(rc); + OPAL_THREAD_UNLOCK(&ompi_proc_lock); + return rc; + } + rc = opal_dss.pack(buf, &(proclist[i]->super.proc_hostname), 1, OPAL_STRING); + if(rc != OPAL_SUCCESS) { + OMPI_ERROR_LOG(rc); + OPAL_THREAD_UNLOCK(&ompi_proc_lock); + return rc; } } OPAL_THREAD_UNLOCK(&ompi_proc_lock); return OMPI_SUCCESS; } -static ompi_proc_t * +ompi_proc_t * ompi_proc_find_and_add(const ompi_process_name_t * name, bool* isnew) { ompi_proc_t *proc, *rproc = NULL; @@ -610,14 +551,11 @@ ompi_proc_find_and_add(const ompi_process_name_t * name, bool* isnew) int ompi_proc_unpack(opal_buffer_t* buf, int proclistsize, ompi_proc_t ***proclist, - bool full_info, int *newproclistsize, ompi_proc_t ***newproclist) { int i; size_t newprocs_len = 0; ompi_proc_t **plist=NULL, **newprocs = NULL; - opal_list_t myvals; - opal_value_t *kv; /* do not free plist *ever*, since it is used in the remote group structure of a communicator */ @@ -635,7 +573,7 @@ ompi_proc_unpack(opal_buffer_t* buf, /* cycle through the array of provided procs and unpack * their info - as packed by ompi_proc_pack */ - for ( i=0; idata.uint32; - } else { - new_arch = opal_local_arch; - } - OPAL_LIST_DESTRUCT(&myvals); -#else - new_arch = opal_local_arch; -#endif - if (ompi_process_info.num_procs < ompi_direct_modex_cutoff) { - /* retrieve the hostname */ - OBJ_CONSTRUCT(&myvals, opal_list_t); - rc = opal_dstore.fetch(opal_dstore_internal, - &new_name, - OPAL_DSTORE_HOSTNAME, &myvals); - if( OPAL_SUCCESS == rc ) { - kv = (opal_value_t*)opal_list_get_first(&myvals); - new_hostname = strdup(kv->data.string); - } else { - new_hostname = NULL; - } - OPAL_LIST_DESTRUCT(&myvals); - } else { - /* just set the hostname to NULL for now - we'll fill it in - * as modex_recv's are called for procs we will talk to - */ - new_hostname = NULL; - } - } /* update all the values */ plist[i]->super.proc_arch = new_arch; /* if arch is different than mine, create a new convertor for this proc */ @@ -770,28 +638,6 @@ ompi_proc_unpack(opal_buffer_t* buf, /* Save the hostname */ plist[i]->super.proc_hostname = new_hostname; } - - } else { - if (full_info) { - int32_t num_recvd_entries; - int32_t j, cnt; - - /* discard all keys: they are already locally known */ - cnt = 1; - if (OPAL_SUCCESS == (rc = opal_dss.unpack(buf, &num_recvd_entries, &cnt, OPAL_INT32))) { - for (j = 0; j < num_recvd_entries; j++) { - opal_value_t *kv; - cnt = 1; - if (OPAL_SUCCESS != (rc = opal_dss.unpack(buf, &kv, &cnt, OPAL_VALUE))) { - OMPI_ERROR_LOG(rc); - continue; - } - OBJ_RELEASE(kv); - } - } else { - OMPI_ERROR_LOG(rc); - } - } } } diff --git a/ompi/proc/proc.h b/ompi/proc/proc.h index 649a222bd7..f178238365 100644 --- a/ompi/proc/proc.h +++ b/ompi/proc/proc.h @@ -219,6 +219,8 @@ static inline ompi_proc_t* ompi_proc_local(void) */ OMPI_DECLSPEC ompi_proc_t * ompi_proc_find ( const ompi_process_name_t* name ); +OMPI_DECLSPEC ompi_proc_t * ompi_proc_find_and_add(const ompi_process_name_t * name, bool* isnew); + /** * Pack proc list into portable buffer * @@ -237,8 +239,8 @@ OMPI_DECLSPEC ompi_proc_t * ompi_proc_find ( const ompi_process_name_t* name ); * @retval OMPI_SUCCESS Success * @retval OMPI_ERROR Unspecified error */ -OMPI_DECLSPEC int ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, - bool full_info, +OMPI_DECLSPEC int ompi_proc_pack(ompi_proc_t **proclist, + int proclistsize, opal_buffer_t *buf); @@ -282,9 +284,10 @@ OMPI_DECLSPEC int ompi_proc_pack(ompi_proc_t **proclist, int proclistsize, * OMPI_ERROR else */ OMPI_DECLSPEC int ompi_proc_unpack(opal_buffer_t *buf, - int proclistsize, ompi_proc_t ***proclist, - bool full_info, - int *newproclistsize, ompi_proc_t ***newproclist); + int proclistsize, + ompi_proc_t ***proclist, + int *newproclistsize, + ompi_proc_t ***newproclist); /** * Refresh the OMPI process subsystem diff --git a/ompi/runtime/ompi_mpi_finalize.c b/ompi/runtime/ompi_mpi_finalize.c index a522121c44..1415995829 100644 --- a/ompi/runtime/ompi_mpi_finalize.c +++ b/ompi/runtime/ompi_mpi_finalize.c @@ -16,7 +16,7 @@ * Copyright (c) 2006 University of Houston. All rights reserved. * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * * $COPYRIGHT$ * @@ -78,8 +78,7 @@ #include "ompi/mca/io/base/base.h" #include "ompi/mca/pml/base/pml_base_bsend.h" #include "ompi/runtime/params.h" -#include "ompi/mca/dpm/base/base.h" -#include "ompi/mca/pubsub/base/base.h" +#include "ompi/dpm/dpm.h" #include "ompi/mpiext/mpiext.h" #if OPAL_ENABLE_FT_CR == 1 @@ -343,13 +342,8 @@ int ompi_mpi_finalize(void) return ret; } - /* finalize the pubsub functions */ - if (OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_pubsub_base_framework) ) ) { - return ret; - } - - /* finalize the DPM framework */ - if ( OMPI_SUCCESS != (ret = mca_base_framework_close(&ompi_dpm_base_framework))) { + /* finalize the DPM subsystem */ + if ( OMPI_SUCCESS != (ret = ompi_dpm_finalize())) { return ret; } diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index 18cb4122d8..c9db272dd0 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -17,7 +17,7 @@ * Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. * Copyright (c) 2012-2013 Inria. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * @@ -89,8 +89,7 @@ #include "ompi/debuggers/debuggers.h" #include "ompi/proc/proc.h" #include "ompi/mca/pml/base/pml_base_bsend.h" -#include "ompi/mca/dpm/base/base.h" -#include "ompi/mca/pubsub/base/base.h" +#include "ompi/dpm/dpm.h" #include "ompi/mpiext/mpiext.h" #if OPAL_ENABLE_FT_CR == 1 @@ -298,6 +297,19 @@ _process_name_compare(const opal_process_name_t p1, const opal_process_name_t p2 return ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL, o1, o2); } +static int _convert_string_to_process_name(opal_process_name_t *name, + const char* name_string) +{ + return ompi_rte_convert_string_to_process_name(name, name_string); +} + +static int _convert_process_name_to_string(char** name_string, + const opal_process_name_t *name) +{ + return ompi_rte_convert_process_name_to_string(name_string, name); +} + + void ompi_mpi_thread_level(int requested, int *provided) { /** @@ -386,6 +398,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) /* Convince OPAL to use our naming scheme */ opal_process_name_print = _process_name_print_for_opal; opal_compare_proc = _process_name_compare; + opal_convert_string_to_process_name = _convert_string_to_process_name; + opal_convert_process_name_to_string = _convert_process_name_to_string; /* Register MCA variables */ if (OPAL_SUCCESS != (ret = ompi_register_mca_variables())) { @@ -492,7 +506,7 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) #if OMPI_ENABLE_THREAD_MULTIPLE /* add this bitflag to the modex */ - OPAL_MODEX_SEND_STRING(ret, PMIX_SYNC_REQD, PMIX_GLOBAL, + OPAL_MODEX_SEND_STRING(ret, OPAL_PMIX_GLOBAL, "MPI_THREAD_LEVEL", &threadlevel_bf, sizeof(uint8_t)); if (OPAL_SUCCESS != ret) { error = "ompi_mpi_init: modex send thread level"; @@ -608,9 +622,9 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) /* exchange connection info - this function may also act as a barrier * if data exchange is required. The modex occurs solely across procs * in our job, so no proc array is passed. If a barrier is required, - * the "fence" function will perform it internally + * the "modex" function will perform it internally */ - OPAL_FENCE(NULL, 0, NULL, NULL); + OPAL_MODEX(NULL, 1); OPAL_TIMING_MNEXT((&tm,"time from modex to first barrier")); @@ -792,23 +806,9 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) goto error; } - /* Setup the publish/subscribe (PUBSUB) framework */ - if (OMPI_SUCCESS != (ret = mca_base_framework_open(&ompi_pubsub_base_framework, 0))) { - error = "mca_pubsub_base_open() failed"; - goto error; - } - if (OMPI_SUCCESS != (ret = ompi_pubsub_base_select())) { - error = "ompi_pubsub_base_select() failed"; - goto error; - } - - /* Setup the dynamic process management (DPM) framework */ - if (OMPI_SUCCESS != (ret = mca_base_framework_open(&ompi_dpm_base_framework, 0))) { - error = "ompi_dpm_base_open() failed"; - goto error; - } - if (OMPI_SUCCESS != (ret = ompi_dpm_base_select())) { - error = "ompi_dpm_base_select() failed"; + /* Setup the dynamic process management (DPM) subsystem */ + if (OMPI_SUCCESS != (ret = ompi_dpm_init())) { + error = "ompi_dpm_init() failed"; goto error; } @@ -843,8 +843,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) /* Check whether we have been spawned or not. We introduce that at the very end, since we need collectives, datatypes, ptls etc. up and running here.... */ - if (OMPI_SUCCESS != (ret = ompi_dpm.dyn_init())) { - error = "ompi_comm_dyn_init() failed"; + if (OMPI_SUCCESS != (ret = ompi_dpm_dyn_init())) { + error = "ompi_dpm_dyn_init() failed"; goto error; } diff --git a/opal/class/opal_hotel.h b/opal/class/opal_hotel.h index c08f44aebb..23aec8d3ca 100644 --- a/opal/class/opal_hotel.h +++ b/opal/class/opal_hotel.h @@ -290,9 +290,8 @@ static inline void opal_hotel_checkout_and_return_occupant(opal_hotel_t *hotel, hotel->unoccupied_rooms[hotel->last_unoccupied_room] = room_num; } else { - opal_output( 0, " OOPS there is no occupant in room_num %d", room_num); - } - + *occupant = NULL; + } } /** @@ -309,6 +308,33 @@ static inline bool opal_hotel_is_empty (opal_hotel_t *hotel) return false; } +/** + * Access the occupant of a room, but leave them checked into their room. + * + * @param hotel Pointer to hotel (IN) + * @param room Room number to checkout (IN) + * @param void * occupant (OUT) + * + * This accessor function is typically used to cycle across the occupants + * to check for someone already present that matches a description. + */ +static inline void opal_hotel_knock(opal_hotel_t *hotel, int room_num, void **occupant) +{ + opal_hotel_room_t *room; + + /* Bozo check */ + assert(room_num < hotel->num_rooms); + + *occupant = NULL; + + /* If there's an occupant in the room, have them come to the door */ + room = &(hotel->rooms[room_num]); + if (OPAL_LIKELY(NULL != room->occupant)) { + opal_output (10, "occupant %p in room num %d responded to knock", room->occupant, room_num); + *occupant = room->occupant; + } +} + END_C_DECLS #endif /* OPAL_HOTEL_H */ diff --git a/opal/dss/dss_load_unload.c b/opal/dss/dss_load_unload.c index bc761816d6..e5a9b5c8f4 100644 --- a/opal/dss/dss_load_unload.c +++ b/opal/dss/dss_load_unload.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -50,8 +50,12 @@ int opal_dss_unload(opal_buffer_t *buffer, void **payload, } /* okay, we have something to provide - pass it back */ - *payload = buffer->base_ptr; - *bytes_used = buffer->bytes_used; + *bytes_used = buffer->bytes_used - (buffer->unpack_ptr - buffer->base_ptr); + if (0 == (*bytes_used)) { + *payload = NULL; + } else { + *payload = buffer->unpack_ptr; + } /* dereference everything in buffer */ buffer->base_ptr = NULL; diff --git a/opal/include/opal/constants.h b/opal/include/opal/constants.h index 75a5418e77..82c046946f 100644 --- a/opal/include/opal/constants.h +++ b/opal/include/opal/constants.h @@ -79,7 +79,8 @@ enum { OPAL_ERR_CONNECTION_FAILED = (OPAL_ERR_BASE - 49), OPAL_ERR_AUTHENTICATION_FAILED = (OPAL_ERR_BASE - 50), OPAL_ERR_COMM_FAILURE = (OPAL_ERR_BASE - 51), - OPAL_ERR_SERVER_NOT_AVAIL = (OPAL_ERR_BASE - 52) + OPAL_ERR_SERVER_NOT_AVAIL = (OPAL_ERR_BASE - 52), + OPAL_ERR_IN_PROCESS = (OPAL_ERR_BASE - 53) }; #define OPAL_ERR_MAX (OPAL_ERR_BASE - 100) diff --git a/opal/mca/btl/openib/btl_openib_component.c b/opal/mca/btl/openib/btl_openib_component.c index 0f4df1f29b..5150f7b4b8 100644 --- a/opal/mca/btl/openib/btl_openib_component.c +++ b/opal/mca/btl/openib/btl_openib_component.c @@ -18,7 +18,7 @@ * Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011-2015 NVIDIA Corporation. All rights reserved. * Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved. @@ -451,7 +451,7 @@ static int btl_openib_modex_send(void) } /* All done -- send it! */ - OPAL_MODEX_SEND(rc, PMIX_SYNC_REQD, PMIX_GLOBAL, + OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL, &mca_btl_openib_component.super.btl_version, message, msg_size); free(message); diff --git a/opal/mca/btl/openib/btl_openib_proc.c b/opal/mca/btl/openib/btl_openib_proc.c index 2d622fec3b..27719bcdef 100644 --- a/opal/mca/btl/openib/btl_openib_proc.c +++ b/opal/mca/btl/openib/btl_openib_proc.c @@ -9,7 +9,7 @@ * University of Stuttgart. All rights reserved. * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. - * Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2006-2007 Voltaire All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved. * $COPYRIGHT$ @@ -148,7 +148,7 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_create(opal_proc_t* proc) /* query for the peer address info */ OPAL_MODEX_RECV(rc, &mca_btl_openib_component.super.btl_version, - proc, &message, &msg_size); + &proc->proc_name, &message, &msg_size); if (OPAL_SUCCESS != rc) { BTL_VERBOSE(("[%s:%d] opal_modex_recv failed for peer %s", __FILE__, __LINE__, diff --git a/opal/mca/btl/portals4/btl_portals4_component.c b/opal/mca/btl/portals4/btl_portals4_component.c index 9c1cffa38d..94b4dd3023 100644 --- a/opal/mca/btl/portals4/btl_portals4_component.c +++ b/opal/mca/btl/portals4/btl_portals4_component.c @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2012 Sandia National Laboratories. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014 Bull SAS. All rights reserved. * Copyright (c) 2015 Los Alamos National Security, LLC. All rights * reserved. @@ -422,7 +422,7 @@ static mca_btl_base_module_t** mca_btl_portals4_component_init(int *num_btls, ptl_process_ids[interface].rank, ptl_process_ids[interface].phys.nid, ptl_process_ids[interface].phys.pid)); } - OPAL_MODEX_SEND(ret, PMIX_SYNC_REQD, PMIX_GLOBAL, + OPAL_MODEX_SEND(ret, OPAL_PMIX_GLOBAL, &mca_btl_portals4_component.super.btl_version, ptl_process_ids, mca_btl_portals4_component.num_btls * sizeof(ptl_process_t)); if (OPAL_SUCCESS != ret) { diff --git a/opal/mca/btl/scif/btl_scif_component.c b/opal/mca/btl/scif/btl_scif_component.c index 36db880445..a92bf429ca 100644 --- a/opal/mca/btl/scif/btl_scif_component.c +++ b/opal/mca/btl/scif/btl_scif_component.c @@ -4,7 +4,7 @@ * reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -216,7 +216,7 @@ static int mca_btl_scif_modex_send (void) memset(&modex, 0, sizeof(mca_btl_scif_modex_t)); modex.port_id = mca_btl_scif_module.port_id; - OPAL_MODEX_SEND(rc, PMIX_SYNC_REQD, PMIX_LOCAL, + OPAL_MODEX_SEND(rc, OPAL_PMIX_LOCAL, &mca_btl_scif_component.super.btl_version, &modex, sizeof (modex)); return rc; diff --git a/opal/mca/btl/tcp/btl_tcp_component.c b/opal/mca/btl/tcp/btl_tcp_component.c index 9275edf98b..5b77708a17 100644 --- a/opal/mca/btl/tcp/btl_tcp_component.c +++ b/opal/mca/btl/tcp/btl_tcp_component.c @@ -16,7 +16,7 @@ * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013-2015 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -917,8 +917,8 @@ static int mca_btl_tcp_component_exchange(void) } } /* end of for opal_ifbegin() */ } /* end of for tcp_num_btls */ - OPAL_MODEX_SEND(rc, PMIX_SYNC_REQD, PMIX_GLOBAL, - &mca_btl_tcp_component.super.btl_version, + OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL, + &mca_btl_tcp_component.super.btl_version, addrs, xfer_size); free(addrs); } /* end if */ diff --git a/opal/mca/btl/tcp/btl_tcp_proc.c b/opal/mca/btl/tcp/btl_tcp_proc.c index 5812d1f312..c86977dde3 100644 --- a/opal/mca/btl/tcp/btl_tcp_proc.c +++ b/opal/mca/btl/tcp/btl_tcp_proc.c @@ -11,10 +11,10 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2008-2010 Oracle and/or its affiliates. All rights reserved - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved + * Copyright (c) 2013-2015 Intel, Inc. All rights reserved * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -133,7 +133,7 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(opal_proc_t* proc) /* lookup tcp parameters exported by this proc */ OPAL_MODEX_RECV(rc, &mca_btl_tcp_component.super.btl_version, - proc, (uint8_t**)&btl_proc->proc_addrs, &size); + &proc->proc_name, (uint8_t**)&btl_proc->proc_addrs, &size); if(rc != OPAL_SUCCESS) { if(OPAL_ERR_NOT_FOUND != rc) BTL_ERROR(("opal_modex_recv: failed with return value=%d", rc)); diff --git a/opal/mca/btl/ugni/btl_ugni_add_procs.c b/opal/mca/btl/ugni/btl_ugni_add_procs.c index 37d6d9dcd3..fbeff5b5f0 100644 --- a/opal/mca/btl/ugni/btl_ugni_add_procs.c +++ b/opal/mca/btl/ugni/btl_ugni_add_procs.c @@ -3,7 +3,7 @@ * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -18,7 +18,7 @@ #include "btl_ugni_smsg.h" #include "opal/include/opal/align.h" -#include "opal/mca/dstore/dstore.h" +#include "opal/mca/pmix/pmix.h" #define INITIAL_GNI_EPS 10000 @@ -289,11 +289,10 @@ static int mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module) { struct mca_mpool_base_resources_t mpool_resources; - unsigned int mbox_increment, nprocs; + unsigned int mbox_increment; + uint32_t nprocs, *u32; const char *mpool_name; int rc; - opal_list_t vals; - opal_value_t *kv; rc = opal_pointer_array_init (&ugni_module->pending_smsg_frags_bb, 0, 1 << 30, 32768); @@ -302,16 +301,13 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module) } /* determine how many procs are in the job (might want to check universe size here) */ - OBJ_CONSTRUCT(&vals, opal_list_t); - if (OPAL_SUCCESS == opal_dstore.fetch(opal_dstore_internal, &OPAL_PROC_MY_NAME, - OPAL_DSTORE_UNIV_SIZE, &vals)) { - /* the number of procs in the job is in the uint32 field */ - kv = (opal_value_t*)opal_list_get_first(&vals); - nprocs = kv->data.uint32; - } else { + u32 = &nprocs; + OPAL_MODEX_RECV_VALUE(rc, OPAL_PMIX_UNIV_SIZE, &OPAL_PROC_MY_NAME, + &u32, OPAL_UINT32); + if (OPAL_SUCCESS != rc) { + /* take a wild conservative guess */ nprocs = 512; } - OPAL_LIST_DESTRUCT(&vals); rc = mca_btl_ugni_smsg_setup (nprocs); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { diff --git a/opal/mca/btl/usnic/btl_usnic_compat.c b/opal/mca/btl/usnic/btl_usnic_compat.c index 997e0432d8..932985175c 100644 --- a/opal/mca/btl/usnic/btl_usnic_compat.c +++ b/opal/mca/btl/usnic/btl_usnic_compat.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2014-2015 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -41,7 +42,7 @@ void usnic_compat_modex_send(int *rc, opal_btl_usnic_modex_t *modexes, size_t size) { - OPAL_MODEX_SEND(*rc, PMIX_SYNC_REQD, PMIX_REMOTE, component, + OPAL_MODEX_SEND(*rc, OPAL_PMIX_REMOTE, component, modexes, size); } @@ -770,25 +771,6 @@ opal_btl_usnic_put(struct mca_btl_base_module_t *base_module, sfrag->sf_size = size; sfrag->sf_ack_bytes_left = size; - - - /* JMS NOTE: This is currently broken, and is deactivated by - removing the MCA_BTL_FLAGS_PUT from .btl_flags in btl_module.c. - - Overwriting the uf_local_seg values is not a good idea, and - doesn't do anything to actually send the data in the - progression past finish_put_or_send(). - - The proper fix is to change the plumbing here to eventually - call fi_sendv() with an iov[0] = the internal buffer that's - already allocated, and iov[1] = the user's buffer. The usnic - provider in fi_sendv() will be smart enough to figure out which - is more performance: memcpy'ing the 2 buffers together and - doing a single xfer down to the hardware, or actually doing a - SG list down to the hardware. */ - - - opal_btl_usnic_frag_t *frag; frag = &sfrag->sf_base; frag->uf_local_seg[0].seg_len = size; diff --git a/opal/mca/btl/vader/btl_vader_component.c b/opal/mca/btl/vader/btl_vader_component.c index 2289991e33..79f12aa70b 100644 --- a/opal/mca/btl/vader/btl_vader_component.c +++ b/opal/mca/btl/vader/btl_vader_component.c @@ -15,7 +15,7 @@ * Copyright (c) 2010-2015 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2011 NVIDIA Corporation. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -327,7 +327,7 @@ static int mca_btl_base_vader_modex_send (void) } #endif - OPAL_MODEX_SEND(rc, PMIX_SYNC_REQD, PMIX_LOCAL, + OPAL_MODEX_SEND(rc, OPAL_PMIX_LOCAL, &mca_btl_vader_component.super.btl_version, &modex, modex_size); return rc; diff --git a/opal/mca/btl/vader/btl_vader_module.c b/opal/mca/btl/vader/btl_vader_module.c index deec2ed4f4..0f3dfddf5c 100644 --- a/opal/mca/btl/vader/btl_vader_module.c +++ b/opal/mca/btl/vader/btl_vader_module.c @@ -14,7 +14,7 @@ * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2010-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -162,7 +162,8 @@ static int init_vader_endpoint (struct mca_btl_base_endpoint_t *ep, struct opal_ ep->peer_smp_rank = remote_rank; if (remote_rank != MCA_BTL_VADER_LOCAL_RANK) { - OPAL_MODEX_RECV(rc, &component->super.btl_version, proc, (void **) &modex, &msg_size); + OPAL_MODEX_RECV(rc, &component->super.btl_version, + &proc->proc_name, (void **) &modex, &msg_size); if (OPAL_SUCCESS != rc) { return rc; } diff --git a/opal/mca/common/ugni/common_ugni.c b/opal/mca/common/ugni/common_ugni.c index 4cd86993c2..971868f360 100644 --- a/opal/mca/common/ugni/common_ugni.c +++ b/opal/mca/common/ugni/common_ugni.c @@ -3,7 +3,7 @@ * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -193,7 +193,7 @@ static int opal_common_ugni_send_modex (int my_cdm_id) * new ranks started on the same nodes as the spawnee ranks, etc. */ - OPAL_MODEX_SEND(rc, PMIX_ASYNC_RDY, PMIX_GLOBAL, + OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL, &opal_common_ugni_component, modex_msg, total_msg_size); diff --git a/opal/mca/common/ugni/common_ugni_ep.c b/opal/mca/common/ugni/common_ugni_ep.c index e1b6446020..dadf39ac52 100644 --- a/opal/mca/common/ugni/common_ugni_ep.c +++ b/opal/mca/common/ugni/common_ugni_ep.c @@ -3,7 +3,7 @@ * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2011 UT-Battelle, LLC. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -34,7 +34,7 @@ int opal_common_ugni_endpoint_for_proc (opal_common_ugni_device_t *dev, opal_pro /* Receive the modex */ OPAL_MODEX_RECV(rc, &opal_common_ugni_component, - peer_proc, (void **)&modex, &msg_size); + &peer_proc->proc_name, (void **)&modex, &msg_size); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { OPAL_OUTPUT((-1, "btl/ugni error receiving modex")); return rc; diff --git a/opal/mca/dstore/Makefile.am b/opal/mca/dstore/Makefile.am deleted file mode 100644 index 93eb59d5f4..0000000000 --- a/opal/mca/dstore/Makefile.am +++ /dev/null @@ -1,32 +0,0 @@ -# -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -AM_CPPFLAGS = $(LTDLINCL) - -# main library setup -noinst_LTLIBRARIES = libmca_dstore.la -libmca_dstore_la_SOURCES = - -# pkgdata setup -dist_opaldata_DATA = - -# local files -headers = dstore.h dstore_types.h -libmca_dstore_la_SOURCES += $(headers) - -# Conditionally install the header files -if WANT_INSTALL_HEADERS -opaldir = $(opalincludedir)/$(subdir) -nobase_opal_HEADERS = $(headers) -endif - -include base/Makefile.am - -distclean-local: - rm -f base/static-components.h diff --git a/opal/mca/dstore/base/Makefile.am b/opal/mca/dstore/base/Makefile.am deleted file mode 100644 index 9ed5ae65ac..0000000000 --- a/opal/mca/dstore/base/Makefile.am +++ /dev/null @@ -1,20 +0,0 @@ -# -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2014 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -dist_opaldata_DATA += base/help-dstore-base.txt - -headers += \ - base/base.h - -libmca_dstore_la_SOURCES += \ - base/dstore_base_frame.c \ - base/dstore_base_select.c \ - base/dstore_base_stubs.c diff --git a/opal/mca/dstore/base/base.h b/opal/mca/dstore/base/base.h deleted file mode 100644 index dd8df381ec..0000000000 --- a/opal/mca/dstore/base/base.h +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - */ - -#ifndef MCA_DSTORE_BASE_H -#define MCA_DSTORE_BASE_H - -#include "opal_config.h" -#include "opal/types.h" - -#include "opal/mca/mca.h" -#include "opal/mca/base/mca_base_framework.h" -#include "opal/mca/event/event.h" -#include "opal/class/opal_hash_table.h" -#include "opal/class/opal_list.h" -#include "opal/class/opal_pointer_array.h" -#include "opal/dss/dss.h" -#include "opal/util/proc.h" - -#include "opal/mca/dstore/dstore.h" - -BEGIN_C_DECLS - -OPAL_DECLSPEC extern mca_base_framework_t opal_dstore_base_framework; - -/** - * Select a dstore module - */ -OPAL_DECLSPEC int opal_dstore_base_select(void); - -/* DSTORE is an oddball framework in that it: - * - * has an active storage component that issues handle-specific - * modules. This is done to provide separate storage areas that - * are isolated from each other, and thus don't have to worry - * about overlapping keys - * - * a backfill module used to attempt to retrieve data that has - * been requested, but that the handle-specific storage module - * does not contain. This is used in situations where data has - * not been provided at startup, and we need to retrieve it - * solely on-demand - */ -typedef struct { - opal_dstore_base_component_t *storage_component; - opal_dstore_base_module_t *backfill_module; - opal_pointer_array_t handles; // array of open datastore handles - opal_list_t available_components; -} opal_dstore_base_t; - -OPAL_DECLSPEC extern opal_dstore_base_t opal_dstore_base; - -typedef struct { - opal_object_t super; - char *name; - opal_dstore_base_module_t *module; - opal_dstore_base_component_t *storage_component; -} opal_dstore_handle_t; -OBJ_CLASS_DECLARATION(opal_dstore_handle_t); - -/** - * Data for a particular opal process - * The name association is maintained in the - * proc_data hash table. - */ -typedef struct { - /** Structure can be put on lists (including in hash tables) */ - opal_list_item_t super; - bool loaded; - /* List of opal_value_t structures containing all data - received from this process, sorted by key. */ - opal_list_t data; -} opal_dstore_proc_data_t; -OBJ_CLASS_DECLARATION(opal_dstore_proc_data_t); - -/** - * Attribute structure to update tracker object - * (used in dstore sm component) - */ -typedef struct { - opal_list_item_t super; - uint32_t jobid; - char *connection_info; -} opal_dstore_attr_t; -OBJ_CLASS_DECLARATION(opal_dstore_attr_t); - -typedef struct { - int32_t seg_index; - uint32_t offset; - int32_t data_size; -} meta_info; - -#define META_OFFSET 65536 - -OPAL_DECLSPEC int opal_dstore_base_open(const char *name, char* desired_components, opal_list_t *attrs); -OPAL_DECLSPEC int opal_dstore_base_update(int dstorehandle, opal_list_t *attrs); -OPAL_DECLSPEC int opal_dstore_base_close(int dstorehandle); -OPAL_DECLSPEC int opal_dstore_base_store(int dstorehandle, - const opal_process_name_t *id, - opal_value_t *kv); -OPAL_DECLSPEC int opal_dstore_base_fetch(int dstorehandle, - const opal_process_name_t *id, - const char *key, - opal_list_t *kvs); -OPAL_DECLSPEC int opal_dstore_base_remove_data(int dstorehandle, - const opal_process_name_t *id, - const char *key); -OPAL_DECLSPEC int opal_dstore_base_get_handle(int dstorehandle, void **dhdl); - -/* support */ -OPAL_DECLSPEC opal_dstore_proc_data_t* opal_dstore_base_lookup_proc(opal_proc_table_t *jtable, - opal_process_name_t id, bool create); - -OPAL_DECLSPEC opal_value_t* opal_dstore_base_lookup_keyval(opal_dstore_proc_data_t *proc_data, - const char *key); - - -END_C_DECLS - -#endif diff --git a/opal/mca/dstore/base/dstore_base_frame.c b/opal/mca/dstore/base/dstore_base_frame.c deleted file mode 100644 index 8c75303ebd..0000000000 --- a/opal/mca/dstore/base/dstore_base_frame.c +++ /dev/null @@ -1,150 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * Copyright (c) 2014-2015 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "opal_config.h" -#include "opal/constants.h" - -#include "opal/mca/mca.h" -#include "opal/util/output.h" -#include "opal/mca/base/base.h" -#include "opal/dss/dss_types.h" - -#include "opal/mca/dstore/base/base.h" - - -/* - * The following file was created by configure. It contains extern - * dstorements and the definition of an array of pointers to each - * module's public mca_base_module_t struct. - */ - -#include "opal/mca/dstore/base/static-components.h" - -opal_dstore_base_API_t opal_dstore = { - opal_dstore_base_open, - opal_dstore_base_update, - opal_dstore_base_close, - opal_dstore_base_store, - opal_dstore_base_fetch, - opal_dstore_base_remove_data, - opal_dstore_base_get_handle -}; -opal_dstore_base_t opal_dstore_base = {0}; - -int opal_dstore_internal = -1; - -static int opal_dstore_base_frame_close(void) -{ - opal_dstore_handle_t *hdl; - opal_list_item_t *item; - int i; - - /* cycle across all the active dstore handles and let them cleanup - order - * doesn't matter in this case - */ - for (i=0; i < opal_dstore_base.handles.size; i++) { - if (NULL != (hdl = (opal_dstore_handle_t*)opal_pointer_array_get_item(&opal_dstore_base.handles, i))) { - OBJ_RELEASE(hdl); - } - } - OBJ_DESTRUCT(&opal_dstore_base.handles); - - for (item = opal_list_remove_first(&opal_dstore_base.available_components); - NULL != item; - item = opal_list_remove_first(&opal_dstore_base.available_components)) { - OBJ_RELEASE(item); - } - OBJ_DESTRUCT(&opal_dstore_base.available_components); - - /* let the backfill module finalize, should it wish to do so */ - if (NULL != opal_dstore_base.backfill_module && NULL != opal_dstore_base.backfill_module->finalize) { - opal_dstore_base.backfill_module->finalize((struct opal_dstore_base_module_t*)opal_dstore_base.backfill_module); - } - - return mca_base_framework_components_close(&opal_dstore_base_framework, NULL); -} - -static int opal_dstore_base_frame_open(mca_base_open_flag_t flags) -{ - OBJ_CONSTRUCT(&opal_dstore_base.handles, opal_pointer_array_t); - opal_pointer_array_init(&opal_dstore_base.handles, 5, INT_MAX, 1); - - OBJ_CONSTRUCT(&opal_dstore_base.available_components, opal_list_t); - - /* Open up all available components */ - return mca_base_framework_components_open(&opal_dstore_base_framework, flags); -} - -MCA_BASE_FRAMEWORK_DECLARE(opal, dstore, NULL, NULL, - opal_dstore_base_frame_open, - opal_dstore_base_frame_close, - mca_dstore_base_static_components, 0); - -/*** CLASS INSTANCES ***/ -static void hdl_con(opal_dstore_handle_t *p) -{ - p->name = NULL; - p->module = NULL; - p->storage_component = NULL; -} -static void hdl_des(opal_dstore_handle_t *p) -{ - opal_dstore_base_module_t *mod; - - if (NULL != p->name) { - free(p->name); - } - if (NULL != p->module) { - mod = (opal_dstore_base_module_t*)p->module; - if (NULL != mod->finalize) { - mod->finalize((struct opal_dstore_base_module_t*)mod); - } - free(mod); - } -} -OBJ_CLASS_INSTANCE(opal_dstore_handle_t, - opal_object_t, - hdl_con, hdl_des); - -static void proc_data_construct(opal_dstore_proc_data_t *ptr) -{ - ptr->loaded = false; - OBJ_CONSTRUCT(&ptr->data, opal_list_t); -} - -static void proc_data_destruct(opal_dstore_proc_data_t *ptr) -{ - OPAL_LIST_DESTRUCT(&ptr->data); -} -OBJ_CLASS_INSTANCE(opal_dstore_proc_data_t, - opal_list_item_t, - proc_data_construct, - proc_data_destruct); - -static void attr_construct(opal_dstore_attr_t *attr) -{ - attr->connection_info = NULL; -} - -static void attr_destruct(opal_dstore_attr_t *attr) -{ - if (NULL != attr->connection_info) { - free(attr->connection_info); - } -} -OBJ_CLASS_INSTANCE(opal_dstore_attr_t, - opal_list_item_t, - attr_construct, attr_destruct); - - diff --git a/opal/mca/dstore/base/dstore_base_select.c b/opal/mca/dstore/base/dstore_base_select.c deleted file mode 100644 index babcd4ce06..0000000000 --- a/opal/mca/dstore/base/dstore_base_select.c +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" -#include "opal/constants.h" - -#include "opal/class/opal_list.h" -#include "opal/mca/mca.h" -#include "opal/mca/base/base.h" -#include "opal/mca/base/mca_base_component_repository.h" -#include "opal/util/output.h" - -#include "opal/mca/dstore/base/base.h" - -static bool selected = false; - -int -opal_dstore_base_select(void) -{ - mca_base_component_list_item_t *cli, *copy_cli; - mca_base_component_t *cmp; - mca_base_module_t *md; - int priority, cmp_pri, mod_pri; - opal_dstore_base_module_t *mod=NULL; - opal_dstore_base_component_t *comp=NULL; - - if (selected) { - /* ensure we don't do this twice */ - return OPAL_SUCCESS; - } - selected = true; - - /* Query all available components and ask if they have a module */ - cmp_pri = -100000; - mod_pri = -100000; - OPAL_LIST_FOREACH(cli, &opal_dstore_base_framework.framework_components, mca_base_component_list_item_t) { - cmp = (mca_base_component_t*)cli->cli_component; - - opal_output_verbose(5, opal_dstore_base_framework.framework_output, - "mca:dstore:select: checking available component %s", - cmp->mca_component_name); - - /* If there's no query function, skip it */ - if (NULL == cmp->mca_query_component) { - opal_output_verbose(5, opal_dstore_base_framework.framework_output, - "mca:dstore:select: Skipping component [%s]. It does not implement a query function", - cmp->mca_component_name ); - continue; - } - - /* Query the component */ - opal_output_verbose(5, opal_dstore_base_framework.framework_output, - "mca:dstore:select: Querying component [%s]", - cmp->mca_component_name); - - /* If the component reports failure, then skip component - however, - * it is okay to return a NULL module */ - if (OPAL_SUCCESS != cmp->mca_query_component(&md, &priority)) { - opal_output_verbose(5, opal_dstore_base_framework.framework_output, - "mca:dstore:select: Skipping component [%s] - not available", - cmp->mca_component_name ); - continue; - } - - copy_cli = OBJ_NEW(mca_base_component_list_item_t); - if (NULL != copy_cli) { - copy_cli->cli_component = cmp; - opal_list_append(&opal_dstore_base.available_components, (opal_list_item_t *)copy_cli); - } - /* track the highest priority component that returned a NULL module - this - * will become our storage element */ - if (NULL == md) { - if (0 < priority && priority > cmp_pri) { - comp = (opal_dstore_base_component_t*)cmp; - cmp_pri = priority; - } - } else { - /* track the highest priority module that was returned - this - * will become our backfill element */ - if (priority > mod_pri) { - mod = (opal_dstore_base_module_t*)md; - mod_pri = priority; - } - } - } - - if (NULL == comp) { - /* no components available - that's bad */ - return OPAL_ERROR; - } - opal_dstore_base.storage_component = comp; - - /* it's okay not to have a backfill module */ - opal_dstore_base.backfill_module = mod; - - return OPAL_SUCCESS;; -} diff --git a/opal/mca/dstore/base/dstore_base_stubs.c b/opal/mca/dstore/base/dstore_base_stubs.c deleted file mode 100644 index 8c6d26528d..0000000000 --- a/opal/mca/dstore/base/dstore_base_stubs.c +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2013-2015 Intel Inc. All rights reserved - * Copyright (c) 2014 Mellanox Technologies, Inc. - * All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - - -#include "opal_config.h" -#include "opal/constants.h" -#include "opal_stdint.h" - -#include "opal/mca/mca.h" -#include "opal/util/argv.h" -#include "opal/util/error.h" -#include "opal/util/output.h" -#include "opal/mca/base/base.h" -#include "opal/dss/dss_types.h" - -#include "opal/mca/dstore/base/base.h" - - -int opal_dstore_base_open(const char *name, char* desired_components, opal_list_t *attrs) -{ - opal_dstore_handle_t *hdl; - int index; - opal_dstore_base_module_t *mod; - int i; - mca_base_component_list_item_t* cli; - char** tokens; - - if (NULL != desired_components) { - tokens = opal_argv_split(desired_components, ','); - for (i = 0; NULL != tokens[i]; i++) { - OPAL_LIST_FOREACH(cli, &opal_dstore_base.available_components, mca_base_component_list_item_t) { - if (0 == strncmp(tokens[i], cli->cli_component->mca_component_name, strlen(tokens[i]))) { - if (NULL != ((opal_dstore_base_component_t*)cli->cli_component)->create_handle && NULL != (mod = ((opal_dstore_base_component_t*)cli->cli_component)->create_handle(attrs))) { - /* have our module, so create a new dstore_handle */ - hdl = OBJ_NEW(opal_dstore_handle_t); - if (NULL != name) { - hdl->name = strdup(name); - } - hdl->module = mod; - hdl->storage_component = (opal_dstore_base_component_t*)cli->cli_component; - if (0 > (index = opal_pointer_array_add(&opal_dstore_base.handles, hdl))) { - OPAL_ERROR_LOG(index); - OBJ_RELEASE(hdl); - } - opal_argv_free(tokens); - opal_output_verbose(1, opal_dstore_base_framework.framework_output, - "Created handle for %s dstore to component %s", - (NULL == hdl->name) ? "NULL" : hdl->name, - cli->cli_component->mca_component_name); - return index; - } - } - } - } - opal_argv_free(tokens); - } else { - OPAL_LIST_FOREACH(cli, &opal_dstore_base.available_components, mca_base_component_list_item_t) { - if (NULL != ((opal_dstore_base_component_t*)cli->cli_component)->create_handle && NULL != (mod = ((opal_dstore_base_component_t*)cli->cli_component)->create_handle(attrs))) { - /* have our module, so create a new dstore_handle */ - hdl = OBJ_NEW(opal_dstore_handle_t); - if (NULL != name) { - hdl->name = strdup(name); - } - hdl->module = mod; - hdl->storage_component = (opal_dstore_base_component_t*)cli->cli_component; - if (0 > (index = opal_pointer_array_add(&opal_dstore_base.handles, hdl))) { - OPAL_ERROR_LOG(index); - OBJ_RELEASE(hdl); - } - opal_output_verbose(1, opal_dstore_base_framework.framework_output, - "Created handle for %s dstore to component %s", - (NULL == hdl->name) ? "NULL" : hdl->name, - cli->cli_component->mca_component_name); - return index; - } - } - } - - /* if we get here, then we were unable to create a module - * for this scope - */ - return OPAL_ERROR; -} - -int opal_dstore_base_update(int dstorehandle, opal_list_t *attrs) -{ - int rc; - opal_dstore_handle_t *hdl; - - if (dstorehandle < 0) { - return OPAL_ERR_NOT_INITIALIZED; - } - - if (NULL == (hdl = (opal_dstore_handle_t*)opal_pointer_array_get_item(&opal_dstore_base.handles, dstorehandle))) { - OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND); - return OPAL_ERR_NOT_FOUND; - } - - if (NULL == hdl->storage_component->update_handle) { - return OPAL_SUCCESS; - } - - if (OPAL_SUCCESS != (rc = hdl->storage_component->update_handle(dstorehandle, attrs))) { - OPAL_ERROR_LOG(rc); - } - - return rc; -} - -int opal_dstore_base_close(int dstorehandle) -{ - opal_dstore_handle_t *hdl; - int i; - - /* if the handle is -1, then close all handles */ - if (dstorehandle < 0) { - for (i=0; i < opal_dstore_base.handles.size; i++) { - if (NULL != (hdl = (opal_dstore_handle_t*)opal_pointer_array_get_item(&opal_dstore_base.handles, i))) { - OBJ_RELEASE(hdl); - opal_pointer_array_set_item(&opal_dstore_base.handles, i, NULL); - } - } - return OPAL_SUCCESS; - } - - /* get the datastore handle */ - if (NULL == (hdl = (opal_dstore_handle_t*)opal_pointer_array_get_item(&opal_dstore_base.handles, dstorehandle))) { - return OPAL_ERR_NOT_FOUND; - } - opal_pointer_array_set_item(&opal_dstore_base.handles, dstorehandle, NULL); - /* release the handle - this will also finalize and free the module */ - OBJ_RELEASE(hdl); - - return OPAL_SUCCESS; -} - - -int opal_dstore_base_store(int dstorehandle, - const opal_process_name_t *id, - opal_value_t *kv) -{ - opal_dstore_handle_t *hdl; - - if (dstorehandle < 0) { - return OPAL_ERR_NOT_INITIALIZED; - } - - if (NULL == (hdl = (opal_dstore_handle_t*)opal_pointer_array_get_item(&opal_dstore_base.handles, dstorehandle))) { - OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND); - return OPAL_ERR_NOT_FOUND; - } - - opal_output_verbose(1, opal_dstore_base_framework.framework_output, - "storing data in %s dstore", (NULL == hdl->name) ? "NULL" : hdl->name); - - return hdl->module->store((struct opal_dstore_base_module_t*)hdl->module, id, kv); -} - -int opal_dstore_base_fetch(int dstorehandle, - const opal_process_name_t *id, - const char *key, - opal_list_t *kvs) -{ - opal_dstore_handle_t *hdl; - int rc; - - if (dstorehandle < 0) { - return OPAL_ERR_NOT_INITIALIZED; - } - - if (NULL == (hdl = (opal_dstore_handle_t*)opal_pointer_array_get_item(&opal_dstore_base.handles, dstorehandle))) { - OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND); - return OPAL_ERR_NOT_FOUND; - } - - opal_output_verbose(1, opal_dstore_base_framework.framework_output, - "fetching data from %s dstore", (NULL == hdl->name) ? "NULL" : hdl->name); - - if (OPAL_SUCCESS == (rc = hdl->module->fetch((struct opal_dstore_base_module_t*)hdl->module, id, key, kvs))) { - /* found the data, so we can just return it */ - return rc; - } - - /* if the storage module didn't find it, then let the backfill module try - * to retrieve it if we have one */ - if (NULL != opal_dstore_base.backfill_module) { - rc = opal_dstore_base.backfill_module->fetch((struct opal_dstore_base_module_t*)opal_dstore_base.backfill_module, id, key, kvs); - } - return rc; -} - -int opal_dstore_base_remove_data(int dstorehandle, - const opal_process_name_t *id, - const char *key) -{ - opal_dstore_handle_t *hdl; - - if (dstorehandle < 0) { - return OPAL_ERR_NOT_INITIALIZED; - } - - if (NULL == (hdl = (opal_dstore_handle_t*)opal_pointer_array_get_item(&opal_dstore_base.handles, dstorehandle))) { - OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND); - return OPAL_ERR_NOT_FOUND; - } - - opal_output_verbose(1, opal_dstore_base_framework.framework_output, - "removing data from %s dstore", (NULL == hdl->name) ? "NULL" : hdl->name); - - return hdl->module->remove((struct opal_dstore_base_module_t*)hdl->module, id, key); -} - -int opal_dstore_base_get_handle(int dstorehandle, void **dhdl) -{ - opal_dstore_handle_t *hdl; - - if (NULL == (hdl = (opal_dstore_handle_t*)opal_pointer_array_get_item(&opal_dstore_base.handles, dstorehandle))) { - OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND); - return OPAL_ERR_NOT_FOUND; - } - - *dhdl = (void*)hdl; - return OPAL_SUCCESS; -} - -/** - * Find data for a given key in a given proc_data_t - * container. - */ -opal_value_t* opal_dstore_base_lookup_keyval(opal_dstore_proc_data_t *proc_data, - const char *key) -{ - opal_value_t *kv; - - OPAL_LIST_FOREACH(kv, &proc_data->data, opal_value_t) { - if (0 == strcmp(key, kv->key)) { - return kv; - } - } - return NULL; -} - - -/** - * Find proc_data_t container associated with given - * opal_process_name_t. - */ -opal_dstore_proc_data_t* opal_dstore_base_lookup_proc(opal_proc_table_t *ptable, - opal_process_name_t id, bool create) -{ - opal_dstore_proc_data_t *proc_data = NULL; - - opal_proc_table_get_value(ptable, id, (void**)&proc_data); - if (NULL == proc_data && create) { - proc_data = OBJ_NEW(opal_dstore_proc_data_t); - if (NULL == proc_data) { - opal_output(0, "dstore:hash:lookup_opal_proc: unable to allocate proc_data_t\n"); - return NULL; - } - opal_proc_table_set_value(ptable, id, proc_data); - } - - return proc_data; -} - diff --git a/opal/mca/dstore/base/help-dstore-base.txt b/opal/mca/dstore/base/help-dstore-base.txt deleted file mode 100644 index 5d7f3d8769..0000000000 --- a/opal/mca/dstore/base/help-dstore-base.txt +++ /dev/null @@ -1,19 +0,0 @@ - -*- text -*- -# -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English general help file for OPAL Errmgr HNP module. -# -[errmgr-hnp:unknown-job-error] -An error has occurred in an unknown job. This generally should not happen -except due to an internal OPAL error. - -Job state: %s - -This information should probably be repopald to the OMPI developers. diff --git a/opal/mca/dstore/base/owner.txt b/opal/mca/dstore/base/owner.txt deleted file mode 100644 index e6150b6b0f..0000000000 --- a/opal/mca/dstore/base/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: project -status: active diff --git a/opal/mca/dstore/dstore.h b/opal/mca/dstore/dstore.h deleted file mode 100644 index 45629faa10..0000000000 --- a/opal/mca/dstore/dstore.h +++ /dev/null @@ -1,222 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012-2014 Los Alamos National Security, Inc. All rights - * reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The Database Framework - used for internal storage of - * information relating to modex and other OMPI operations - * - */ - -#ifndef OPAL_DB_H -#define OPAL_DB_H - -#include "opal_config.h" -#include "opal/types.h" - -#include "opal/mca/mca.h" -#include "opal/mca/event/event.h" -#include "opal/dss/dss_types.h" -#include "opal/util/proc.h" - -#include "opal/mca/dstore/dstore_types.h" - -/** - * DATABASE DESIGN - * - * Each API function is treated as blocking. - * - */ - -BEGIN_C_DECLS - -/* declare a global handle until such time - * as someone figures out how to separate the various - * datastore channels - */ -OPAL_DECLSPEC extern int opal_dstore_peer; -OPAL_DECLSPEC extern int opal_dstore_internal; -OPAL_DECLSPEC extern int opal_dstore_nonpeer; - -/**** DEFINE THE PUBLIC API'S ****/ -/* - * Open a database - * - * Open a database for access. The name field is purely for - * debug purposes and has no implementation relevance. - * Just like the standard POSIX file open, the call will return - * a unique "handle" that must be provided with any subsequent - * call to store or fetch data from this database. - * - * The attributes parameter can be used to pass any desired - * optional directives to the active storage component. These - * are passed as a list of opal_value_t's. - * - * NOTE: calls to these APIs must be thread-protected as there - * is NO internal thread safety. - */ -typedef int (*opal_dstore_base_API_open_fn_t)(const char *name, char* desired_components, - opal_list_t *attributes); - -/* - * Update an existing handle - * - * Sometimes an existing handle requires an update to its attributes, so - * provide an API for doing so - */ -typedef int (*opal_dstore_base_API_update_fn_t)(int dstorehandle, - opal_list_t *attributes); - -/* - * Close a database handle - * - * Close the specified database handle. A -1 handle indicates - * that ALL open database handles are to be closed. - */ -typedef int (*opal_dstore_base_API_close_fn_t)(int dstorehandle); - -/* - * Store a data value against the primary key - overwrites any data - * of matching key that is already present. The data is copied into the database - * and therefore does not need to be preserved by the caller. - */ -typedef int (*opal_dstore_base_API_store_fn_t)(int dstorehandle, - const opal_process_name_t *id, - opal_value_t *kv); - -/* - * Retrieve data - * - * Retrieve data for the given primary key associated with the specified key. Wildcards - * are supported here as well. Caller is responsible for releasing the returned list - * of opal_value_t objects. - */ -typedef int (*opal_dstore_base_API_fetch_fn_t)(int dstorehandle, - const opal_process_name_t *id, - const char *key, - opal_list_t *kvs); - -/* - * Delete data - * - * Delete the data for the given primary key that is associated with the specified key. - * If a NULL key is provided, all data for the given primary key will be deleted. - */ -typedef int (*opal_dstore_base_API_remove_fn_t)(int dstorehandle, - const opal_process_name_t *id, - const char *key); - - -/* - * Get active dstore handle - * Get dstore handle asocciated with the passed id. - */ -typedef int (*opal_dstore_base_API_get_handle_fn_t)(int dstorehandle, void **dhdl); - - -/* - * the standard public API data structure - */ -typedef struct { - opal_dstore_base_API_open_fn_t open; - opal_dstore_base_API_update_fn_t update; - opal_dstore_base_API_close_fn_t close; - opal_dstore_base_API_store_fn_t store; - opal_dstore_base_API_fetch_fn_t fetch; - opal_dstore_base_API_remove_fn_t remove; - opal_dstore_base_API_get_handle_fn_t get_handle; -} opal_dstore_base_API_t; - - - -/**** DEFINE THE MODULE API'S ****/ -/* Note that each datastore handle will be associated with - * a single active module. Thus, storing and fetching data - * from that module does not require that we pass in the - * handle itself. - * - * NOTE: the call to actually store/fetch data in a given - * datastore handle must be protected against threaded operations - * as there is NO thread protection inside the various modules. - */ -struct opal_dstore_base_module_t; - -/* - * Initialize the module - */ -typedef int (*opal_dstore_base_module_init_fn_t)(struct opal_dstore_base_module_t *mod); - -/* - * Finalize the module - */ -typedef void (*opal_dstore_base_module_finalize_fn_t)(struct opal_dstore_base_module_t *mod); - -/* store the data in this module */ -typedef int (*opal_dstore_base_module_store_fn_t)(struct opal_dstore_base_module_t *mod, - const opal_process_name_t *id, - opal_value_t *kv); - -/* fetch data from the module */ -typedef int (*opal_dstore_base_module_fetch_fn_t)(struct opal_dstore_base_module_t *mod, - const opal_process_name_t *id, - const char *key, - opal_list_t *kvs); - -/* remove data */ -typedef int (*opal_dstore_base_module_remove_fn_t)(struct opal_dstore_base_module_t *mod, - const opal_process_name_t *id, - const char *key); - -/* - * the standard module data structure - */ -typedef struct { - opal_dstore_base_module_init_fn_t init; - opal_dstore_base_module_finalize_fn_t finalize; - opal_dstore_base_module_store_fn_t store; - opal_dstore_base_module_fetch_fn_t fetch; - opal_dstore_base_module_remove_fn_t remove; -} opal_dstore_base_module_t; - -/* - * the component data structure - */ - -/* create and return a datastore module */ -typedef opal_dstore_base_module_t* (*mca_dstore_base_component_create_hdl_fn_t)(opal_list_t *attributes); - -/* update an existing handle */ -typedef int (*mca_dstore_base_component_update_hdl_fn_t)(int hdl, opal_list_t *attributes); - -/* provide a chance for the component to finalize */ -typedef void (*mca_dstore_base_component_finalize_fn_t)(void); - -typedef struct { - mca_base_component_t base_version; - mca_base_component_data_t base_data; - mca_dstore_base_component_create_hdl_fn_t create_handle; - mca_dstore_base_component_update_hdl_fn_t update_handle; - mca_dstore_base_component_finalize_fn_t finalize; -} opal_dstore_base_component_t; - -/* - * Macro for use in components that are of type dstore - */ -#define OPAL_DSTORE_BASE_VERSION_2_0_0 \ - OPAL_MCA_BASE_VERSION_2_1_0("dstore", 2, 0, 0) - -/* Global structure for accessing store functions */ -OPAL_DECLSPEC extern opal_dstore_base_API_t opal_dstore; /* holds base function pointers */ - -END_C_DECLS - -#endif diff --git a/opal/mca/dstore/dstore_types.h b/opal/mca/dstore/dstore_types.h deleted file mode 100644 index e8e660f725..0000000000 --- a/opal/mca/dstore/dstore_types.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - * The OPAL Database Framework - * - */ - -#ifndef OPAL_DSTORE_TYPES_H -#define OPAL_DSTORE_TYPES_H - -#include "opal_config.h" -#include "opal/types.h" - -#include "opal/dss/dss_types.h" -#include "opal/mca/pmix/pmix.h" - -BEGIN_C_DECLS - -/* some values are provided by an external entity such - * as the resource manager. These values enter the - * system via the PMIx interface at startup, but are - * not explicitly retrieved by processes. Instead, procs - * access them after RTE-init has stored them. For ease-of-use, - * we define equivalent dstore names here. PMIx attributes - * not listed here should be directly accessed via the - * OPAL pmix framework */ -#define OPAL_DSTORE_CPUSET PMIX_CPUSET -#define OPAL_DSTORE_CREDENTIAL PMIX_CREDENTIAL -#define OPAL_DSTORE_TMPDIR PMIX_TMPDIR -#define OPAL_DSTORE_JOBID PMIX_JOBID -#define OPAL_DSTORE_APPNUM PMIX_APPNUM -#define OPAL_DSTORE_RANK PMIX_RANK -#define OPAL_DSTORE_GLOBAL_RANK PMIX_GLOBAL_RANK -#define OPAL_DSTORE_LOCALRANK PMIX_LOCAL_RANK -#define OPAL_DSTORE_NODERANK PMIX_NODE_RANK -#define OPAL_DSTORE_LOCALLDR PMIX_LOCALLDR -#define OPAL_DSTORE_APPLDR PMIX_APPLDR -#define OPAL_DSTORE_LOCAL_PEERS PMIX_LOCAL_PEERS -#define OPAL_DSTORE_UNIV_SIZE PMIX_UNIV_SIZE -#define OPAL_DSTORE_JOB_SIZE PMIX_JOB_SIZE -#define OPAL_DSTORE_LOCAL_SIZE PMIX_LOCAL_SIZE -#define OPAL_DSTORE_NODE_SIZE PMIX_NODE_SIZE -#define OPAL_DSTORE_MAX_PROCS PMIX_MAX_PROCS -#define OPAL_DSTORE_NPROC_OFFSET PMIX_NPROC_OFFSET -#define OPAL_DSTORE_HOSTNAME PMIX_HOSTNAME -#define OPAL_DSTORE_NODEID PMIX_NODE_ID - -/* some OPAL-appropriate key definitions */ -#define OPAL_DSTORE_LOCALITY "opal.locality" // (uint16_t) relative locality of a peer -/* proc-specific scratch dirs */ -#define OPAL_DSTORE_JOB_SDIR "opal.job.session.dir" // (char*) job-level session dir -#define OPAL_DSTORE_MY_SDIR "opal.my.session.dir" // (char*) session dir for this proc -#define OPAL_DSTORE_URI "opal.uri" // (char*) uri of specified proc -#define OPAL_DSTORE_ARCH "opal.arch" // (uint32_t) arch for specified proc - -END_C_DECLS - -#endif diff --git a/opal/mca/dstore/hash/Makefile.am b/opal/mca/dstore/hash/Makefile.am deleted file mode 100644 index 670de4a3e3..0000000000 --- a/opal/mca/dstore/hash/Makefile.am +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - dstore_hash.h \ - dstore_hash_component.c \ - dstore_hash.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_opal_dstore_hash_DSO -component_noinst = -component_install = mca_dstore_hash.la -else -component_noinst = libmca_dstore_hash.la -component_install = -endif - -mcacomponentdir = $(opallibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_dstore_hash_la_SOURCES = $(sources) -mca_dstore_hash_la_LDFLAGS = -module -avoid-version -mca_dstore_hash_la_LIBADD = $(dstore_hash_LIBS) - -noinst_LTLIBRARIES = $(component_noinst) -libmca_dstore_hash_la_SOURCES =$(sources) -libmca_dstore_hash_la_LDFLAGS = -module -avoid-version diff --git a/opal/mca/dstore/hash/dstore_hash.h b/opal/mca/dstore/hash/dstore_hash.h deleted file mode 100644 index 78b65a05bb..0000000000 --- a/opal/mca/dstore/hash/dstore_hash.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef OPAL_DSTORE_HASH_H -#define OPAL_DSTORE_HASH_H - -#include "opal/class/opal_hash_table.h" -#include "opal/mca/dstore/dstore.h" - -BEGIN_C_DECLS - - -OPAL_MODULE_DECLSPEC extern opal_dstore_base_component_t mca_dstore_hash_component; - -typedef struct { - opal_dstore_base_module_t api; - opal_proc_table_t ptable; -} mca_dstore_hash_module_t; -OPAL_MODULE_DECLSPEC extern mca_dstore_hash_module_t opal_dstore_hash_module; - -END_C_DECLS - -#endif /* OPAL_DSTORE_HASH_H */ diff --git a/opal/mca/dstore/hash/dstore_hash_component.c b/opal/mca/dstore/hash/dstore_hash_component.c deleted file mode 100644 index 7e69755522..0000000000 --- a/opal/mca/dstore/hash/dstore_hash_component.c +++ /dev/null @@ -1,84 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. - * Copyright (c) 2013-2014 Intel, Inc. All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - * These symbols are in a file by themselves to provide nice linker - * semantics. Since linkers generally pull in symbols by object - * files, keeping these symbols as the only symbols in this file - * prevents utility programs such as "ompi_info" from having to import - * entire components just to query their version and parameters. - */ - -#include "opal_config.h" -#include "opal/constants.h" - -#include "opal/mca/base/base.h" -#include "opal/util/error.h" - -#include "opal/mca/dstore/dstore.h" -#include "opal/mca/dstore/base/base.h" -#include "dstore_hash.h" - -static opal_dstore_base_module_t *component_create(opal_list_t *attrs); -static int dstore_hash_query(mca_base_module_t **module, int *priority); - -/* - * Instantiate the public struct with all of our public information - * and pointers to our public functions in it - */ -opal_dstore_base_component_t mca_dstore_hash_component = { - .base_version = { - OPAL_DSTORE_BASE_VERSION_2_0_0, - - /* Component name and version */ - .mca_component_name = "hash", - MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION), - - /* Component open and close functions */ - .mca_query_component = dstore_hash_query, - }, - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - .create_handle = component_create, -}; - -static int dstore_hash_query(mca_base_module_t **module, int *priority) -{ - /* we are always available, but only as storage */ - *priority = 80; - *module = NULL; - return OPAL_SUCCESS; -} - -/* this component ignores any input attributes */ -static opal_dstore_base_module_t *component_create(opal_list_t *attrs) -{ - mca_dstore_hash_module_t *mod; - - mod = (mca_dstore_hash_module_t*)malloc(sizeof(mca_dstore_hash_module_t)); - if (NULL == mod) { - OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE); - return NULL; - } - /* copy the APIs across */ - memcpy(mod, &opal_dstore_hash_module.api, sizeof(opal_dstore_base_module_t)); - /* let the module init itself */ - if (OPAL_SUCCESS != mod->api.init((struct opal_dstore_base_module_t*)mod)) { - /* release the module and return the error */ - free(mod); - return NULL; - } - return (opal_dstore_base_module_t*)mod; -} diff --git a/opal/mca/dstore/hash/owner.txt b/opal/mca/dstore/hash/owner.txt deleted file mode 100644 index e6150b6b0f..0000000000 --- a/opal/mca/dstore/hash/owner.txt +++ /dev/null @@ -1,7 +0,0 @@ -# -# owner/status file -# owner: institution that is responsible for this package -# status: e.g. active, maintenance, unmaintained -# -owner: project -status: active diff --git a/opal/mca/pmix/Makefile.am b/opal/mca/pmix/Makefile.am index 7829965414..0d71f702cd 100644 --- a/opal/mca/pmix/Makefile.am +++ b/opal/mca/pmix/Makefile.am @@ -17,7 +17,7 @@ libmca_pmix_la_SOURCES = dist_opaldata_DATA = # local files -headers = pmix.h +headers = pmix.h pmix_types.h pmix_server.h libmca_pmix_la_SOURCES += $(headers) # Conditionally install the header files diff --git a/opal/mca/pmix/base/Makefile.am b/opal/mca/pmix/base/Makefile.am index c419c72e4a..3af899e2b0 100644 --- a/opal/mca/pmix/base/Makefile.am +++ b/opal/mca/pmix/base/Makefile.am @@ -1,7 +1,7 @@ # # Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. -# Copyright (c) 2014 Intel, Inc. All rights reserved. +# Copyright (c) 2014-2015 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -13,9 +13,11 @@ dist_opaldata_DATA += base/help-pmix-base.txt headers += \ base/base.h \ - base/pmix_base_fns.h + base/pmix_base_fns.h \ + base/pmix_base_hash.h libmca_pmix_la_SOURCES += \ base/pmix_base_frame.c \ base/pmix_base_select.c \ - base/pmix_base_fns.c + base/pmix_base_fns.c \ + base/pmix_base_hash.c diff --git a/opal/mca/pmix/base/base.h b/opal/mca/pmix/base/base.h index 3cb5bcd83c..90441a7960 100644 --- a/opal/mca/pmix/base/base.h +++ b/opal/mca/pmix/base/base.h @@ -17,6 +17,8 @@ #include "opal/mca/mca.h" #include "opal/mca/base/mca_base_framework.h" + +#include "opal/mca/pmix/pmix_types.h" #include "opal/mca/pmix/pmix.h" BEGIN_C_DECLS @@ -30,6 +32,12 @@ OPAL_DECLSPEC int opal_pmix_base_select(void); OPAL_DECLSPEC extern bool opal_pmix_base_allow_delayed_server; +OPAL_DECLSPEC void opal_pmix_base_register_handler(opal_pmix_errhandler_fn_t err); +OPAL_DECLSPEC void opal_pmix_base_deregister_handler(void); +OPAL_DECLSPEC void opal_pmix_base_errhandler(int status, + opal_list_t *procs, + opal_list_t *info); + END_C_DECLS #endif diff --git a/opal/mca/pmix/base/pmix_base_fns.c b/opal/mca/pmix/base/pmix_base_fns.c index 36f18b5f2e..ad35455205 100644 --- a/opal/mca/pmix/base/pmix_base_fns.c +++ b/opal/mca/pmix/base/pmix_base_fns.c @@ -31,9 +31,31 @@ #include "opal/mca/pmix/base/base.h" #include "opal/mca/pmix/base/pmix_base_fns.h" +#include "opal/mca/pmix/base/pmix_base_hash.h" #define OPAL_PMI_PAD 10 +static opal_pmix_errhandler_fn_t errhandler = NULL; + +void opal_pmix_base_register_handler(opal_pmix_errhandler_fn_t err) +{ + errhandler = err; +} + +void opal_pmix_base_errhandler(int status, + opal_list_t *procs, + opal_list_t *info) +{ + if (NULL != errhandler) { + errhandler(status); + } +} + +void opal_pmix_base_deregister_handler(void) +{ + errhandler = NULL; +} + static char* setup_key(const opal_process_name_t* name, const char *key, int pmix_keylen_max); static char *pmi_encode(const void *val, size_t vallen); static uint8_t *pmi_decode (const char *data, size_t *retlen); @@ -361,7 +383,7 @@ int opal_pmix_base_cache_keys_locally(const opal_process_name_t* id, const char* /* first try to fetch data from data storage */ OBJ_CONSTRUCT(&values, opal_list_t); - rc = opal_dstore.fetch(opal_dstore_internal, id, key, &values); + rc = opal_pmix_base_fetch(id, key, &values); if (OPAL_SUCCESS == rc) { kv = (opal_value_t*)opal_list_get_first(&values); /* create the copy */ @@ -456,10 +478,9 @@ int opal_pmix_base_cache_keys_locally(const opal_process_name_t* id, const char* return OPAL_ERROR; } /* store data in local hash table */ - if (OPAL_SUCCESS != (rc = opal_dstore.store(opal_dstore_internal, id, kv))) { + if (OPAL_SUCCESS != (rc = opal_pmix_base_store(id, kv))) { OPAL_ERROR_LOG(rc); } - /* keep going and cache everything locally */ offset = (size_t) (tmp3 - tmp_val) + size; if (0 == strcmp(kv->key, key)) { diff --git a/opal/mca/pmix/base/pmix_base_frame.c b/opal/mca/pmix/base/pmix_base_frame.c index 234909a22e..cc64466dcd 100644 --- a/opal/mca/pmix/base/pmix_base_frame.c +++ b/opal/mca/pmix/base/pmix_base_frame.c @@ -31,8 +31,9 @@ /* Note that this initializer is important -- do not remove it! See https://github.com/open-mpi/ompi/issues/375 for details. */ opal_pmix_base_module_t opal_pmix = { 0 }; -bool opal_pmix_use_collective = false; +bool opal_pmix_collect_all_data = false; bool opal_pmix_base_allow_delayed_server = false; +int pmix_verbose_output = -1; static int opal_pmix_base_frame_register(mca_base_register_flag_t flags) { @@ -57,6 +58,8 @@ static int opal_pmix_base_frame_open(mca_base_open_flag_t flags) rc = mca_base_framework_components_open(&opal_pmix_base_framework, flags); /* ensure the function pointers are NULL */ memset(&opal_pmix, 0, sizeof(opal_pmix)); + /* pass across the verbosity */ + pmix_verbose_output = opal_pmix_base_framework.framework_output; return rc; } @@ -66,7 +69,80 @@ MCA_BASE_FRAMEWORK_DECLARE(opal, pmix, "OPAL PMI Client Framework", opal_pmix_base_frame_close, mca_pmix_base_static_components, 0); -OBJ_CLASS_INSTANCE(pmix_info_t, +/**** PMIX FRAMEWORK OBJECTS ****/ +static void icon(opal_pmix_info_t *i) +{ + i->key = NULL; + OBJ_CONSTRUCT(&i->value, opal_value_t); +} +static void ides(opal_pmix_info_t *i) +{ + if (NULL != i->key) { + free(i->key); + } + OBJ_DESTRUCT(&i->value); +} +OBJ_CLASS_INSTANCE(opal_pmix_info_t, opal_list_item_t, - NULL, NULL); + icon, ides); +static void lkcon(opal_pmix_pdata_t *p) +{ + p->proc.jobid = OPAL_JOBID_INVALID; + p->proc.vpid = OPAL_VPID_INVALID; + p->key = NULL; + OBJ_CONSTRUCT(&p->value, opal_value_t); +} +static void lkdes(opal_pmix_pdata_t *p) +{ + if (NULL != p->key) { + free(p->key); + } + OBJ_DESTRUCT(&p->value); +} +OBJ_CLASS_INSTANCE(opal_pmix_pdata_t, + opal_list_item_t, + lkcon, lkdes); + +static void mdcon(opal_pmix_modex_data_t *p) +{ + p->proc.jobid = OPAL_JOBID_INVALID; + p->proc.vpid = OPAL_VPID_INVALID; + p->blob = NULL; + p->size = 0; +} +static void mddes(opal_pmix_modex_data_t *p) +{ + if (NULL != p->blob) { + free(p->blob); + } +} +OBJ_CLASS_INSTANCE(opal_pmix_modex_data_t, + opal_list_item_t, + mdcon, mddes); + +static void apcon(opal_pmix_app_t *p) +{ + p->cmd = NULL; + p->argc = 0; + p->argv = NULL; + p->env = NULL; + p->maxprocs = 0; + OBJ_CONSTRUCT(&p->info, opal_list_t); +} +static void apdes(opal_pmix_app_t *p) +{ + if (NULL != p->cmd) { + free(p->cmd); + } + if (NULL != p->argv) { + opal_argv_free(p->argv); + } + if (NULL != p->env) { + opal_argv_free(p->env); + } + OPAL_LIST_DESTRUCT(&p->info); +} +OBJ_CLASS_INSTANCE(opal_pmix_app_t, + opal_list_item_t, + apcon, apdes); diff --git a/opal/mca/dstore/hash/dstore_hash.c b/opal/mca/pmix/base/pmix_base_hash.c similarity index 63% rename from opal/mca/dstore/hash/dstore_hash.c rename to opal/mca/pmix/base/pmix_base_hash.c index ba623fbd6a..c062ce0eaa 100644 --- a/opal/mca/dstore/hash/dstore_hash.c +++ b/opal/mca/pmix/base/pmix_base_hash.c @@ -6,7 +6,7 @@ * reserved. * Copyright (c) 2011-2014 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -32,61 +32,102 @@ #include "opal/util/proc.h" #include "opal/util/show_help.h" -#include "opal/mca/dstore/base/base.h" -#include "dstore_hash.h" +#include "opal/mca/pmix/base/base.h" +#include "opal/mca/pmix/base/pmix_base_hash.h" -static int init(struct opal_dstore_base_module_t *imod); -static void finalize(struct opal_dstore_base_module_t *imod); -static int store(struct opal_dstore_base_module_t *imod, - const opal_process_name_t *proc, - opal_value_t *val); -static int fetch(struct opal_dstore_base_module_t *imod, - const opal_process_name_t *proc, - const char *key, - opal_list_t *kvs); -static int remove_data(struct opal_dstore_base_module_t *imod, - const opal_process_name_t *proc, const char *key); - -mca_dstore_hash_module_t opal_dstore_hash_module = { - { - init, - finalize, - store, - fetch, - remove_data - } -}; - -/* Initialize our hash table */ -static int init(struct opal_dstore_base_module_t *imod) +/** + * Data for a particular opal process + * The name association is maintained in the + * proc_data hash table. + */ +typedef struct { + /** Structure can be put on lists (including in hash tables) */ + opal_list_item_t super; + bool loaded; + /* List of opal_value_t structures containing all data + received from this process, sorted by key. */ + opal_list_t data; +} opal_pmix_proc_data_t; +static void proc_data_construct(opal_pmix_proc_data_t *ptr) { - mca_dstore_hash_module_t *mod; - - mod = (mca_dstore_hash_module_t*)imod; - OBJ_CONSTRUCT(&mod->ptable, opal_proc_table_t); - opal_proc_table_init(&mod->ptable, 16, 256); - return OPAL_SUCCESS; + ptr->loaded = false; + OBJ_CONSTRUCT(&ptr->data, opal_list_t); } -static void finalize(struct opal_dstore_base_module_t *imod) +static void proc_data_destruct(opal_pmix_proc_data_t *ptr) { - opal_dstore_proc_data_t *proc_data; + OPAL_LIST_DESTRUCT(&ptr->data); +} +OBJ_CLASS_INSTANCE(opal_pmix_proc_data_t, + opal_list_item_t, + proc_data_construct, + proc_data_destruct); + +/** + * Find data for a given key in a given proc_data_t + * container. + */ +static opal_value_t* lookup_keyval(opal_pmix_proc_data_t *proc_data, + const char *key) +{ + opal_value_t *kv; + + OPAL_LIST_FOREACH(kv, &proc_data->data, opal_value_t) { + if (0 == strcmp(key, kv->key)) { + return kv; + } + } + return NULL; +} + +/** + * Find proc_data_t container associated with given + * opal_process_name_t. + */ +static opal_pmix_proc_data_t* lookup_proc(opal_proc_table_t *ptable, + opal_process_name_t id, bool create) +{ + opal_pmix_proc_data_t *proc_data = NULL; + + opal_proc_table_get_value(ptable, id, (void**)&proc_data); + if (NULL == proc_data && create) { + proc_data = OBJ_NEW(opal_pmix_proc_data_t); + if (NULL == proc_data) { + opal_output(0, "pmix:hash:lookup_proc: unable to allocate proc_data_t\n"); + return NULL; + } + opal_proc_table_set_value(ptable, id, proc_data); + } + + return proc_data; +} + + +static opal_proc_table_t ptable; + +/* Initialize our hash table */ +void opal_pmix_base_hash_init(void) +{ + OBJ_CONSTRUCT(&ptable, opal_proc_table_t); + opal_proc_table_init(&ptable, 16, 256); +} + +void opal_pmix_base_hash_finalize(void) +{ + opal_pmix_proc_data_t *proc_data; opal_process_name_t key; void *node1, *node2; - mca_dstore_hash_module_t *mod; - - mod = (mca_dstore_hash_module_t*)imod; /* to assist in getting a clean valgrind, cycle thru the hash table * and release all data stored in it */ - if (OPAL_SUCCESS == opal_proc_table_get_first_key(&mod->ptable, &key, + if (OPAL_SUCCESS == opal_proc_table_get_first_key(&ptable, &key, (void**)&proc_data, &node1, &node2)) { if (NULL != proc_data) { OBJ_RELEASE(proc_data); } - while (OPAL_SUCCESS == opal_proc_table_get_next_key(&mod->ptable, &key, + while (OPAL_SUCCESS == opal_proc_table_get_next_key(&ptable, &key, (void**)&proc_data, node1, &node1, node2, &node2)) { @@ -95,30 +136,26 @@ static void finalize(struct opal_dstore_base_module_t *imod) } } } - OBJ_DESTRUCT(&mod->ptable); + OBJ_DESTRUCT(&ptable); } -static int store(struct opal_dstore_base_module_t *imod, - const opal_process_name_t *id, - opal_value_t *val) +int opal_pmix_base_store(const opal_process_name_t *id, + opal_value_t *val) { - opal_dstore_proc_data_t *proc_data; + opal_pmix_proc_data_t *proc_data; opal_value_t *kv; - mca_dstore_hash_module_t *mod; int rc; - mod = (mca_dstore_hash_module_t*)imod; - - opal_output_verbose(1, opal_dstore_base_framework.framework_output, + opal_output_verbose(1, opal_pmix_base_framework.framework_output, "%s dstore:hash:store storing data for proc %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), OPAL_NAME_PRINT(*id)); /* lookup the proc data object for this proc */ - if (NULL == (proc_data = opal_dstore_base_lookup_proc(&mod->ptable, *id, true))) { + if (NULL == (proc_data = lookup_proc(&ptable, *id, true))) { /* unrecoverable error */ - OPAL_OUTPUT_VERBOSE((5, opal_dstore_base_framework.framework_output, + OPAL_OUTPUT_VERBOSE((5, opal_pmix_base_framework.framework_output, "%s dstore:hash:store: storing data for proc %s unrecoverably failed", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), OPAL_NAME_PRINT(*id))); return OPAL_ERR_OUT_OF_RESOURCE; @@ -127,10 +164,10 @@ static int store(struct opal_dstore_base_module_t *imod, /* see if we already have this key in the data - means we are updating * a pre-existing value */ - kv = opal_dstore_base_lookup_keyval(proc_data, val->key); + kv = lookup_keyval(proc_data, val->key); #if OPAL_ENABLE_DEBUG char *_data_type = opal_dss.lookup_data_type(val->type); - OPAL_OUTPUT_VERBOSE((5, opal_dstore_base_framework.framework_output, + OPAL_OUTPUT_VERBOSE((5, opal_pmix_base_framework.framework_output, "%s dstore:hash:store: %s key %s[%s] for proc %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (NULL == kv ? "storing" : "updating"), @@ -152,25 +189,21 @@ static int store(struct opal_dstore_base_module_t *imod, return OPAL_SUCCESS; } -static int fetch(struct opal_dstore_base_module_t *imod, - const opal_process_name_t *id, - const char *key, opal_list_t *kvs) +int opal_pmix_base_fetch(const opal_process_name_t *id, + const char *key, opal_list_t *kvs) { - opal_dstore_proc_data_t *proc_data; + opal_pmix_proc_data_t *proc_data; opal_value_t *kv, *knew; - mca_dstore_hash_module_t *mod; int rc; - mod = (mca_dstore_hash_module_t*)imod; - - OPAL_OUTPUT_VERBOSE((5, opal_dstore_base_framework.framework_output, + OPAL_OUTPUT_VERBOSE((5, opal_pmix_base_framework.framework_output, "%s dstore:hash:fetch: searching for key %s on proc %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (NULL == key) ? "NULL" : key, OPAL_NAME_PRINT(*id))); /* lookup the proc data object for this proc */ - if (NULL == (proc_data = opal_dstore_base_lookup_proc(&mod->ptable, *id, true))) { - OPAL_OUTPUT_VERBOSE((5, opal_dstore_base_framework.framework_output, + if (NULL == (proc_data = lookup_proc(&ptable, *id, true))) { + OPAL_OUTPUT_VERBOSE((5, opal_pmix_base_framework.framework_output, "%s dstore_hash:fetch data for proc %s not found", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), OPAL_NAME_PRINT(*id))); @@ -190,7 +223,7 @@ static int fetch(struct opal_dstore_base_module_t *imod, OPAL_ERROR_LOG(rc); return rc; } - OPAL_OUTPUT_VERBOSE((5, opal_dstore_base_framework.framework_output, + OPAL_OUTPUT_VERBOSE((5, opal_pmix_base_framework.framework_output, "%s dstore:hash:fetch: adding data for key %s on proc %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (NULL == kv->key) ? "NULL" : kv->key, @@ -203,8 +236,8 @@ static int fetch(struct opal_dstore_base_module_t *imod, } /* find the value */ - if (NULL == (kv = opal_dstore_base_lookup_keyval(proc_data, key))) { - OPAL_OUTPUT_VERBOSE((5, opal_dstore_base_framework.framework_output, + if (NULL == (kv = lookup_keyval(proc_data, key))) { + OPAL_OUTPUT_VERBOSE((5, opal_pmix_base_framework.framework_output, "%s dstore_hash:fetch key %s for proc %s not found", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (NULL == key) ? "NULL" : key, @@ -229,17 +262,13 @@ static int fetch(struct opal_dstore_base_module_t *imod, return OPAL_SUCCESS; } -static int remove_data(struct opal_dstore_base_module_t *imod, - const opal_process_name_t *id, const char *key) +int opal_pmix_base_remove(const opal_process_name_t *id, const char *key) { - opal_dstore_proc_data_t *proc_data; + opal_pmix_proc_data_t *proc_data; opal_value_t *kv; - mca_dstore_hash_module_t *mod; - - mod = (mca_dstore_hash_module_t*)imod; /* lookup the specified proc */ - if (NULL == (proc_data = opal_dstore_base_lookup_proc(&mod->ptable, *id, false))) { + if (NULL == (proc_data = lookup_proc(&ptable, *id, false))) { /* no data for this proc */ return OPAL_SUCCESS; } @@ -250,7 +279,7 @@ static int remove_data(struct opal_dstore_base_module_t *imod, OBJ_RELEASE(kv); } /* remove the proc_data object itself from the jtable */ - opal_proc_table_remove_value(&mod->ptable, *id); + opal_proc_table_remove_value(&ptable, *id); /* cleanup */ OBJ_RELEASE(proc_data); return OPAL_SUCCESS; diff --git a/opal/mca/pmix/base/pmix_base_hash.h b/opal/mca/pmix/base/pmix_base_hash.h new file mode 100644 index 0000000000..5ab3e0ffa0 --- /dev/null +++ b/opal/mca/pmix/base/pmix_base_hash.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + * Copyright (c) 2014 Research Organization for Information Science + * and Technology (RIST). All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_PMIX_HASH_H +#define OPAL_PMIX_HASH_H + +#include "opal/class/opal_list.h" +#include "opal/class/opal_hash_table.h" +#include "opal/dss/dss.h" +#include "opal/util/proc.h" + +BEGIN_C_DECLS + +OPAL_DECLSPEC void opal_pmix_base_hash_init(void); +OPAL_DECLSPEC void opal_pmix_base_hash_finalize(void); + +OPAL_DECLSPEC int opal_pmix_base_store(const opal_process_name_t *id, + opal_value_t *val); + +OPAL_DECLSPEC int opal_pmix_base_fetch(const opal_process_name_t *id, + const char *key, opal_list_t *kvs); + +OPAL_DECLSPEC int opal_pmix_base_remove(const opal_process_name_t *id, const char *key); + +END_C_DECLS + +#endif /* OPAL_DSTORE_HASH_H */ diff --git a/opal/mca/pmix/base/pmix_base_select.c b/opal/mca/pmix/base/pmix_base_select.c index feed10a28e..fe49f56004 100644 --- a/opal/mca/pmix/base/pmix_base_select.c +++ b/opal/mca/pmix/base/pmix_base_select.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -22,7 +22,6 @@ int opal_pmix_base_select(void) { - int ret, exit_status = OPAL_SUCCESS; opal_pmix_base_component_t *best_component = NULL; opal_pmix_base_module_t *best_module = NULL; @@ -40,16 +39,9 @@ int opal_pmix_base_select(void) /* Save the winner */ opal_pmix = *best_module; - /* Initialize the winner */ - if (OPAL_SUCCESS != (ret = opal_pmix.init()) ) { - /* connection not available is okay - just means - * that a server hasn't already been defined */ - if (OPAL_ERR_SERVER_NOT_AVAIL == ret) { - exit_status = OPAL_SUCCESS; - } else { - exit_status = ret; - } - } + /* do not initialize the module here as the type + * of process determines which init (client or server) + * should be done */ - return exit_status; + return OPAL_SUCCESS; } diff --git a/opal/mca/pmix/cray/pmix_cray.c b/opal/mca/pmix/cray/pmix_cray.c index e996f590cc..43f412df83 100644 --- a/opal/mca/pmix/cray/pmix_cray.c +++ b/opal/mca/pmix/cray/pmix_cray.c @@ -32,54 +32,53 @@ #include #include "opal/mca/pmix/base/base.h" +#include "opal/mca/pmix/base/pmix_base_hash.h" #include "pmix_cray.h" static int cray_init(void); static int cray_fini(void); -static bool cray_initialized(void); -static int cray_abort(int flag, const char msg[]); -static int cray_spawn(int count, const char * cmds[], - int argcs[], const char ** argvs[], - const int maxprocs[], - opal_list_t *info_keyval_vector, - opal_list_t *preput_keyval_vector, - char jobId[], int jobIdSize, - int errors[]); -static int cray_job_connect(const char jobId[]); -static int cray_job_disconnect(const char jobId[]); +static int cray_initialized(void); +static int cray_abort(int flat, const char *msg, + opal_list_t *procs); +static int cray_spawn(opal_list_t *apps, opal_jobid_t *jobid); +static int cray_job_connect(opal_list_t *procs); +static int cray_job_disconnect(opal_list_t *procs); static int cray_put(opal_pmix_scope_t scope, opal_value_t *kv); -static int cray_fence(opal_process_name_t *procs, size_t nprocs); +static int cray_fence(opal_list_t *procs, int collect_data); +static int cray_commit(void); static int cray_get(const opal_process_name_t *id, const char *key, opal_value_t **kv); -static int cray_publish(const char service_name[], - opal_list_t *info, - const char port[]); -static int cray_lookup(const char service_name[], - opal_list_t *info, - char port[], int portLen); -static int cray_unpublish(const char service_name[], - opal_list_t *info); +static int cray_publish(opal_pmix_data_range_t scope, + opal_pmix_persistence_t persist, + opal_list_t *info); +static int cray_lookup(opal_pmix_data_range_t scope, + opal_list_t *data); +static int cray_unpublish(opal_pmix_data_range_t scope, char **keys); +static int cray_store_local(const opal_process_name_t *proc, + opal_value_t *val); +#if 0 static bool cray_get_attr(const char *attr, opal_value_t **kv); +#endif const opal_pmix_base_module_t opal_pmix_cray_module = { - cray_init, - cray_fini, - cray_initialized, - cray_abort, - cray_fence, - NULL, - cray_put, - cray_get, - NULL, - cray_publish, - cray_lookup, - cray_unpublish, - cray_get_attr, - NULL, - cray_spawn, - cray_job_connect, - cray_job_disconnect + .init = cray_init, + .finalize = cray_fini, + .initialized = cray_initialized, + .abort = cray_abort, + .fence = cray_fence, + .put = cray_put, + .get = cray_get, + .commit = cray_commit, + .publish = cray_publish, + .lookup = cray_lookup, + .unpublish = cray_unpublish, + .spawn = cray_spawn, + .connect = cray_job_connect, + .disconnect = cray_job_disconnect, + .register_errhandler = opal_pmix_base_register_handler, + .deregister_errhandler = opal_pmix_base_deregister_handler, + .store_local = cray_store_local }; // usage accounting @@ -120,11 +119,18 @@ static int cray_init(void) char buf[PMI2_MAX_ATTRVALUE]; int found; uint32_t jobfam; + opal_value_t kv; + opal_process_name_t ldr; + char nmtmp[64]; + char *str, **localranks = NULL; ++pmix_init_count; /* if we can't startup PMI, we can't be used */ if ( PMI2_Initialized () ) { + opal_output_verbose(10, opal_pmix_base_framework.framework_output, + "%s pmix:cray: pmi already initialized", + OPAL_NAME_PRINT(pmix_pname)); return OPAL_SUCCESS; } size = -1; @@ -149,19 +155,13 @@ static int cray_init(void) pmix_vallen_threshold = PMI2_MAX_VALLEN * 3; pmix_vallen_threshold >>= 2; - rc = PMI2_Info_GetJobAttr("universeSize", buf, 16, &found); - if( PMI_SUCCESS != rc ) { - OPAL_PMI_ERROR(rc, "PMI_Get_universe_size"); - goto err_exit; - } - pmix_usize = atoi(buf); - pmix_kvs_name = (char*)malloc(pmix_kvslen_max); if( pmix_kvs_name == NULL ){ PMI2_Finalize(); ret = OPAL_ERR_OUT_OF_RESOURCE; goto err_exit; } + rc = PMI2_Job_GetId(pmix_kvs_name, pmix_kvslen_max); if( PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI2_Job_GetId"); @@ -170,7 +170,9 @@ static int cray_init(void) rc = sscanf(pmix_kvs_name,"kvs_%u",&jobfam); if (rc != 1) { - OPAL_PMI_ERROR(rc, "PMI2_Job_GetId"); + opal_output_verbose(10, opal_pmix_base_framework.framework_output, + "%s pmix:cray: pmix_kvs_name %s", + OPAL_NAME_PRINT(pmix_pname), pmix_kvs_name); rc = OPAL_ERROR; goto err_exit; } @@ -210,15 +212,137 @@ static int cray_init(void) free(pmapping); - /* find ourselves */ + // setup hash table + opal_pmix_base_hash_init(); + + /* save the job size */ + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_JOB_SIZE); + kv.type = OPAL_UINT32; + kv.data.uint32 = pmix_size; + if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + OBJ_DESTRUCT(&kv); + + /* save the appnum */ + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_APPNUM); + kv.type = OPAL_UINT32; + kv.data.uint32 = pmix_appnum; + if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { + OPAL_ERROR_LOG(ret); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + OBJ_DESTRUCT(&kv); + + rc = PMI2_Info_GetJobAttr("universeSize", buf, 16, &found); + if( PMI_SUCCESS != rc ) { + OPAL_PMI_ERROR(rc, "PMI_Get_universe_size"); + goto err_exit; + } + + pmix_usize = atoi(buf); + + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_UNIV_SIZE); + kv.type = OPAL_UINT32; + kv.data.uint32 = pmix_usize; + if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + OBJ_DESTRUCT(&kv); + + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_JOBID); + kv.type = OPAL_UINT32; + kv.data.uint32 = pmix_jobid; + if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { + OPAL_ERROR_LOG(ret); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + OBJ_DESTRUCT(&kv); + + /* save the local size */ + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_LOCAL_SIZE); + kv.type = OPAL_UINT16; + kv.data.uint16 = pmix_nlranks; + if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { + OPAL_ERROR_LOG(rc); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + OBJ_DESTRUCT(&kv); + + ldr.vpid = pmix_lranks[0]; + ldr.jobid = pmix_pname.jobid; + + /* find ourselves and build up a string for local peer info */ + memset(nmtmp, 0, 64); for (i=0; i < pmix_nlranks; i++) { + ret = snprintf(nmtmp, 64, "%d", pmix_lranks[i]); + opal_argv_append_nosize(&localranks, nmtmp); if (pmix_rank == pmix_lranks[i]) { pmix_lrank = i; pmix_nrank = i; - break; } } + str = opal_argv_join(localranks, ','); + opal_argv_free(localranks); + + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_LOCAL_PEERS); + kv.type = OPAL_STRING; + kv.data.string = str; + if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { + OPAL_ERROR_LOG(ret); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + OBJ_DESTRUCT(&kv); + + /* save the local leader */ + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_LOCALLDR); + kv.type = OPAL_UINT64; + kv.data.uint64 = *(uint64_t*)&ldr; + if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { + OPAL_ERROR_LOG(ret); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + + /* save our local rank */ + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_LOCAL_RANK); + kv.type = OPAL_UINT16; + kv.data.uint16 = pmix_lrank; + if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { + OPAL_ERROR_LOG(ret); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + + /* and our node rank */ + OBJ_CONSTRUCT(&kv, opal_value_t); + kv.key = strdup(OPAL_PMIX_NODE_RANK); + kv.type = OPAL_UINT16; + kv.data.uint16 = pmix_nrank; + if (OPAL_SUCCESS != (ret = opal_pmix_base_store(&OPAL_PROC_MY_NAME, &kv))) { + OPAL_ERROR_LOG(ret); + OBJ_DESTRUCT(&kv); + goto err_exit; + } + OBJ_DESTRUCT(&kv); + return OPAL_SUCCESS; err_exit: PMI2_Finalize(); @@ -248,36 +372,32 @@ static int cray_fini(void) { return OPAL_SUCCESS; } -static bool cray_initialized(void) +static int cray_initialized(void) { if (0 < pmix_init_count) { - return true; + return 1; } - return false; + return 0; } -static int cray_abort(int status, const char *msg) +static int cray_abort(int flag, const char *msg, + opal_list_t *procs) { - return PMI2_Abort(status, msg); + PMI2_Abort(flag, msg); + return OPAL_SUCCESS; } -static int cray_spawn(int count, const char * cmds[], - int argcs[], const char ** argvs[], - const int maxprocs[], - opal_list_t *info_keyval_vector, - opal_list_t *preput_keyval_vector, - char jobId[], int jobIdSize, - int errors[]) +static int cray_spawn(opal_list_t *apps, opal_jobid_t *jobid) { return OPAL_ERR_NOT_IMPLEMENTED; } -static int cray_job_connect(const char jobId[]) +static int cray_job_connect(opal_list_t *procs) { return OPAL_ERR_NOT_IMPLEMENTED; } -static int cray_job_disconnect(const char jobId[]) +static int cray_job_disconnect(opal_list_t *procs) { return OPAL_ERR_NOT_IMPLEMENTED; } @@ -290,6 +410,11 @@ static int cray_put(opal_pmix_scope_t scope, opal_output_verbose(10, opal_pmix_base_framework.framework_output, "%s pmix:cray cray_put key %s scope %d\n", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kv->key, scope); + + if (!pmix_init_count) { + return OPAL_ERROR; + } + /* * for now just always just global cache */ @@ -309,7 +434,12 @@ static int cray_put(opal_pmix_scope_t scope, return rc; } -static int cray_fence(opal_process_name_t *procs, size_t nprocs) +static int cray_commit(void) +{ + return OPAL_SUCCESS; +} + +static int cray_fence(opal_list_t *procs, int collect_data) { int rc, cnt; int32_t i; @@ -331,13 +461,19 @@ static int cray_fence(opal_process_name_t *procs, size_t nprocs) opal_hwloc_locality_t locality; opal_list_t vals; char *cpuset = NULL; + opal_process_name_t pname; opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix:cray executing fence on %u procs cache_global %p cache_local %p", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (unsigned int)nprocs, + "%s pmix:cray executing fence cache_global %p cache_local %p", + OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), (void *)mca_pmix_cray_component.cache_global, (void *)mca_pmix_cray_component.cache_local); + /* get the modex data from each local process and set the + * localities to avoid having the MPI layer fetch data + * for every process in the job */ + pname.jobid = OPAL_PROC_MY_NAME.jobid; + /* * "unload" the cache_local/cache_global buffers, first copy * it so we can continue to use the local buffers if further @@ -429,8 +565,7 @@ static int cray_fence(opal_process_name_t *procs, size_t nprocs) opal_output_verbose(20, opal_pmix_base_framework.framework_output, "%s pmix:cray unpacked kp with key %s type(%d) for id %s", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kp->key, kp->type, OPAL_NAME_PRINT(id)); - if (OPAL_SUCCESS != (rc = opal_dstore.store(opal_dstore_internal, - &id, kp))) { + if (OPAL_SUCCESS != (rc = opal_pmix_base_store(&id, kp))) { OPAL_ERROR_LOG(rc); goto fn_exit; } @@ -452,8 +587,8 @@ static int cray_fence(opal_process_name_t *procs, size_t nprocs) #if OPAL_HAVE_HWLOC /* fetch my cpuset */ OBJ_CONSTRUCT(&vals, opal_list_t); - if (OPAL_SUCCESS == (rc = opal_dstore.fetch(opal_dstore_internal, &pmix_pname, - OPAL_DSTORE_CPUSET, &vals))) { + if (OPAL_SUCCESS == (rc = opal_pmix_base_fetch(&pmix_pname, + OPAL_PMIX_CPUSET, &vals))) { kp = (opal_value_t*)opal_list_get_first(&vals); cpuset = strdup(kp->data.string); } else { @@ -474,8 +609,8 @@ static int cray_fence(opal_process_name_t *procs, size_t nprocs) /* fetch cpuset for this vpid */ #if OPAL_HAVE_HWLOC OBJ_CONSTRUCT(&vals, opal_list_t); - if (OPAL_SUCCESS != (rc = opal_dstore.fetch(opal_dstore_internal, &pmix_pname, - OPAL_DSTORE_CPUSET, &vals))) { + if (OPAL_SUCCESS != (rc = opal_pmix_base_fetch(&id, + OPAL_PMIX_CPUSET, &vals))) { opal_output_verbose(2, opal_pmix_base_framework.framework_output, "%s cpuset for local proc %s not found", OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), @@ -510,10 +645,10 @@ static int cray_fence(opal_process_name_t *procs, size_t nprocs) opal_hwloc_base_print_locality(locality))); OBJ_CONSTRUCT(&kvn, opal_value_t); - kvn.key = strdup(OPAL_DSTORE_LOCALITY); + kvn.key = strdup(OPAL_PMIX_LOCALITY); kvn.type = OPAL_UINT16; kvn.data.uint16 = locality; - (void)opal_dstore.store(opal_dstore_internal, &id, &kvn); + opal_pmix_base_store(&pname, &kvn); OBJ_DESTRUCT(&kvn); } @@ -546,7 +681,7 @@ static int cray_get(const opal_process_name_t *id, const char *key, opal_value_t OPAL_NAME_PRINT(*id), key); OBJ_CONSTRUCT(&vals, opal_list_t); - rc = opal_dstore.fetch(opal_dstore_internal, id, key, &vals); + rc = opal_pmix_base_fetch(id, key, &vals); if (OPAL_SUCCESS == rc) { *kv = (opal_value_t*)opal_list_remove_first(&vals); return OPAL_SUCCESS; @@ -558,53 +693,35 @@ static int cray_get(const opal_process_name_t *id, const char *key, opal_value_t OPAL_LIST_DESTRUCT(&vals); return rc; + } -static int cray_publish(const char service_name[], - opal_list_t *info, - const char port[]) +static int cray_publish(opal_pmix_data_range_t scope, + opal_pmix_persistence_t persist, + opal_list_t *info) { - int rc; - - if (PMI_SUCCESS != (rc = PMI2_Nameserv_publish(service_name, NULL, port))) { - OPAL_PMI_ERROR(rc, "PMI2_Nameserv_publish"); - return OPAL_ERROR; - } - return OPAL_SUCCESS; + return OPAL_ERR_NOT_IMPLEMENTED; } -static int cray_lookup(const char service_name[], - opal_list_t *info, - char port[], int portLen) +static int cray_lookup(opal_pmix_data_range_t scope, + opal_list_t *data) { - int rc; - - if (PMI_SUCCESS != (rc = PMI2_Nameserv_lookup(service_name, NULL, port, portLen))) { - OPAL_PMI_ERROR(rc, "PMI2_Nameserv_lookup"); - return OPAL_ERROR; - } - - return OPAL_SUCCESS; + return OPAL_ERR_NOT_IMPLEMENTED; } -static int cray_unpublish(const char service_name[], - opal_list_t *info) +static int cray_unpublish(opal_pmix_data_range_t scope, char **keys) { - int rc; - - if (PMI_SUCCESS != (rc = PMI2_Nameserv_unpublish(service_name, NULL))) { - OPAL_PMI_ERROR(rc, "PMI2_Nameserv_unpublish"); - return OPAL_ERROR; - } - return OPAL_SUCCESS;; + return OPAL_ERR_NOT_IMPLEMENTED; } +#if 0 + static bool cray_get_attr(const char *attr, opal_value_t **kv) { int rc, i; opal_value_t *kp; - if (0 == strcmp(PMIX_JOBID, attr)) { + if (0 == strcmp(OPAL_PMIX_JOBID, attr)) { kp = OBJ_NEW(opal_value_t); kp->key = strdup(attr); kp->type = OPAL_UINT32; @@ -613,7 +730,7 @@ static bool cray_get_attr(const char *attr, opal_value_t **kv) return true; } - if (0 == strcmp(PMIX_RANK, attr)) { + if (0 == strcmp(OPAL_PMIX_RANK, attr)) { rc = PMI_Get_rank(&i); if( PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI_Get_rank"); @@ -627,7 +744,7 @@ static bool cray_get_attr(const char *attr, opal_value_t **kv) return true; } - if (0 == strcmp(PMIX_UNIV_SIZE, attr)) { + if (0 == strcmp(OPAL_PMIX_UNIV_SIZE, attr)) { rc = PMI_Get_universe_size(&i); if( PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI_Get_universe_size"); @@ -641,7 +758,7 @@ static bool cray_get_attr(const char *attr, opal_value_t **kv) return true; } - if (0 == strcmp(PMIX_JOB_SIZE, attr)) { + if (0 == strcmp(OPAL_PMIX_JOB_SIZE, attr)) { rc = PMI_Get_size(&i); if( PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI_Get_size"); @@ -656,7 +773,7 @@ static bool cray_get_attr(const char *attr, opal_value_t **kv) } - if (0 == strcmp(PMIX_APPNUM, attr)) { + if (0 == strcmp(OPAL_PMIX_APPNUM, attr)) { rc = PMI_Get_appnum(&i); if( PMI_SUCCESS != rc ) { OPAL_PMI_ERROR(rc, "PMI_Get_appnum"); @@ -670,7 +787,7 @@ static bool cray_get_attr(const char *attr, opal_value_t **kv) return true; } - if (0 == strcmp(PMIX_LOCAL_RANK, attr)) { + if (0 == strcmp(OPAL_PMIX_LOCAL_RANK, attr)) { kp = OBJ_NEW(opal_value_t); kp->key = strdup(attr); kp->type = OPAL_UINT32; @@ -679,7 +796,7 @@ static bool cray_get_attr(const char *attr, opal_value_t **kv) return true; } - if (0 == strcmp(PMIX_NODE_RANK, attr)) { + if (0 == strcmp(OPAL_PMIX_NODE_RANK, attr)) { kp = OBJ_NEW(opal_value_t); kp->key = strdup(attr); kp->type = OPAL_UINT32; @@ -688,7 +805,7 @@ static bool cray_get_attr(const char *attr, opal_value_t **kv) return true; } - if (0 == strcmp(PMIX_LOCAL_SIZE, attr)) { + if (0 == strcmp(OPAL_PMIX_LOCAL_SIZE, attr)) { kp = OBJ_NEW(opal_value_t); kp->key = strdup(attr); kp->type = OPAL_UINT32; @@ -699,6 +816,15 @@ static bool cray_get_attr(const char *attr, opal_value_t **kv) return OPAL_ERR_NOT_IMPLEMENTED; } +#endif + +static int cray_store_local(const opal_process_name_t *proc, + opal_value_t *val) +{ + opal_pmix_base_store(proc, val); + + return OPAL_SUCCESS; +} static char* pmix_error(int pmix_err) { diff --git a/opal/mca/pmix/cray/pmix_cray_component.c b/opal/mca/pmix/cray/pmix_cray_component.c index 9749e95a4f..20d13ea113 100644 --- a/opal/mca/pmix/cray/pmix_cray_component.c +++ b/opal/mca/pmix/cray/pmix_cray_component.c @@ -88,9 +88,10 @@ static int pmix_cray_component_query(mca_base_module_t **module, int *priority) FILE *fd = NULL, *fd_task_is_app = NULL; char task_is_app_fname[PATH_MAX]; - /* disqualify ourselves if not running in a Cray PAGG container */ + /* disqualify ourselves if not running in a Cray PAGG container, or we + were launched by the orte/mpirun launcher */ fd = fopen(proc_job_file, "r"); - if (fd == NULL) { + if ((fd == NULL) || (getenv("OMPI_NO_USE_CRAY_PMI") != NULL)) { *priority = 0; *module = NULL; rc = OPAL_ERROR; diff --git a/opal/mca/pmix/native/.opal_ignore b/opal/mca/pmix/native/.opal_ignore new file mode 100644 index 0000000000..e69de29bb2 diff --git a/opal/mca/pmix/native/Makefile.am b/opal/mca/pmix/native/Makefile.am deleted file mode 100644 index ff14157b7c..0000000000 --- a/opal/mca/pmix/native/Makefile.am +++ /dev/null @@ -1,36 +0,0 @@ -# -# Copyright (c) 2014 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -sources = \ - pmix_native.h \ - pmix_native_component.c \ - pmix_native.c \ - usock.c \ - usock_sendrecv.c - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if MCA_BUILD_opal_pmix_native_DSO -component_noinst = -component_install = mca_pmix_native.la -else -component_noinst = libmca_pmix_native.la -component_install = -endif - -mcacomponentdir = $(opallibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_pmix_native_la_SOURCES = $(sources) -mca_pmix_native_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_pmix_native_la_SOURCES =$(sources) -libmca_pmix_native_la_LDFLAGS = -module -avoid-version diff --git a/opal/mca/pmix/native/configure.m4 b/opal/mca/pmix/native/configure.m4 deleted file mode 100644 index cde17cdb84..0000000000 --- a/opal/mca/pmix/native/configure.m4 +++ /dev/null @@ -1,42 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2011-2013 Los Alamos National Security, LLC. -# All rights reserved. -# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved. -# Copyright (c) 2013 Intel, Inc. All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# MCA_pmix_native_CONFIG([action-if-found], [action-if-not-found]) -# ----------------------------------------------------------- -AC_DEFUN([MCA_opal_pmix_native_CONFIG],[ - AC_CONFIG_FILES([opal/mca/pmix/native/Makefile]) - - # check for sockaddr_un (a good sign we have Unix domain sockets) - AC_CHECK_TYPES([struct sockaddr_un], - [pmix_native_happy="yes"], - [pmix_native_happy="no"], - [AC_INCLUDES_DEFAULT -#ifdef HAVE_SYS_SOCKET_H -#include -#endif -#ifdef HAVE_SYS_UN_H -#include -#endif]) - - AS_IF([test "$pmix_native_happy" = "yes"], [$1], [$2]) -])dnl diff --git a/opal/mca/pmix/native/pmix_native.h b/opal/mca/pmix/native/pmix_native.h deleted file mode 100644 index a3757c154e..0000000000 --- a/opal/mca/pmix/native/pmix_native.h +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright (c) 2014-2015 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef MCA_PMIX_NATIVE_H -#define MCA_PMIX_NATIVE_H - -#include "opal_config.h" - -#ifdef HAVE_SYS_SOCKET_H -#include -#endif -#ifdef HAVE_SYS_UN_H -#include -#endif - -#include "opal/mca/mca.h" -#include "opal/mca/event/event.h" -#include "opal/errhandler/opal_errhandler.h" -#include "opal/util/proc.h" - -#include "opal/mca/pmix/base/base.h" - -BEGIN_C_DECLS - -/** - * the state of the connection to the server - */ -typedef enum { - PMIX_USOCK_UNCONNECTED, - PMIX_USOCK_CLOSED, - PMIX_USOCK_RESOLVE, - PMIX_USOCK_CONNECTING, - PMIX_USOCK_CONNECT_ACK, - PMIX_USOCK_CONNECTED, - PMIX_USOCK_FAILED, - PMIX_USOCK_ACCEPTING -} pmix_usock_state_t; - -/* define a macro for abnormal termination */ -#define PMIX_NATIVE_ABNORMAL_TERM \ - do { \ - mca_pmix_native_component.state = PMIX_USOCK_FAILED; \ - opal_invoke_errhandler(OPAL_ERR_COMM_FAILURE, NULL); \ - } while(0); - -/* define a command type for communicating to the - * pmix server */ -typedef uint8_t pmix_cmd_t; -#define PMIX_CMD_T OPAL_UINT8 - -/* define some commands */ -#define PMIX_ABORT_CMD 1 -#define PMIX_FENCE_CMD 2 -#define PMIX_FENCENB_CMD 3 -#define PMIX_PUT_CMD 4 -#define PMIX_GET_CMD 5 -#define PMIX_GETNB_CMD 6 -#define PMIX_FINALIZE_CMD 7 -#define PMIX_GETATTR_CMD 8 - -/* define some message types */ -#define PMIX_USOCK_IDENT 1 -#define PMIX_USOCK_USER 2 - -/* internally used cbfunc */ -typedef void (*pmix_usock_cbfunc_t)(opal_buffer_t *buf, void *cbdata); - -/* header for messages */ -typedef struct { - opal_process_name_t id; - uint8_t type; - uint32_t tag; - size_t nbytes; -} pmix_usock_hdr_t; - -/* usock structure for sending a message */ -typedef struct { - opal_list_item_t super; - opal_event_t ev; - pmix_usock_hdr_t hdr; - char *data; - bool hdr_sent; - char *sdptr; - size_t sdbytes; -} pmix_usock_send_t; -OBJ_CLASS_DECLARATION(pmix_usock_send_t); - -/* usock structure for recving a message */ -typedef struct { - opal_list_item_t super; - opal_event_t ev; - pmix_usock_hdr_t hdr; - char *data; - bool hdr_recvd; - char *rdptr; - size_t rdbytes; -} pmix_usock_recv_t; -OBJ_CLASS_DECLARATION(pmix_usock_recv_t); - -/* usock struct for posting send/recv request */ -typedef struct { - opal_object_t super; - opal_event_t ev; - opal_buffer_t *bfr; - pmix_usock_cbfunc_t cbfunc; - void *cbdata; -} pmix_usock_sr_t; -OBJ_CLASS_DECLARATION(pmix_usock_sr_t); - -/* usock structure for tracking posted recvs */ -typedef struct { - opal_list_item_t super; - opal_event_t ev; - uint32_t tag; - pmix_usock_cbfunc_t cbfunc; - void *cbdata; -} pmix_usock_posted_recv_t; -OBJ_CLASS_DECLARATION(pmix_usock_posted_recv_t); - - -/* usock struct for tracking ops */ -typedef struct { - opal_object_t super; - opal_event_t ev; - volatile bool active; - opal_buffer_t data; - opal_pmix_cbfunc_t cbfunc; - void *cbdata; -} pmix_cb_t; -OBJ_CLASS_DECLARATION(pmix_cb_t); - - -typedef struct { - opal_pmix_base_component_t super; - opal_buffer_t *cache_local; - opal_buffer_t *cache_remote; - opal_buffer_t *cache_global; - opal_event_base_t *evbase; - opal_process_name_t id; - opal_process_name_t server; - char *uri; - struct sockaddr_un address; - int sd; - int max_retries; - int retries; // number of times we have tried to connect to this address - pmix_usock_state_t state; - opal_event_t op_event; // used for connecting and operations other than read/write - uint32_t tag; // current tag - opal_event_t send_event; // registration with event thread for send events - bool send_ev_active; - opal_event_t recv_event; // registration with event thread for recv events - bool recv_ev_active; - opal_event_t timer_event; // timer for retrying connection failures - bool timer_ev_active; - opal_list_t send_queue; // list of pmix_usock_sent_t to be sent - pmix_usock_send_t *send_msg; // current send in progress - pmix_usock_recv_t *recv_msg; // current recv in progress - opal_list_t posted_recvs; // list of pmix_usock_posted_recv_t -} opal_pmix_native_component_t; - -OPAL_DECLSPEC extern opal_pmix_native_component_t mca_pmix_native_component; - -OPAL_DECLSPEC extern const opal_pmix_base_module_t opal_pmix_native_module; - - -/* module-level shared functions */ -OPAL_MODULE_DECLSPEC void pmix_usock_process_msg(int fd, short flags, void *cbdata); -OPAL_MODULE_DECLSPEC void pmix_usock_send_recv(int fd, short args, void *cbdata); -OPAL_MODULE_DECLSPEC void pmix_usock_send_handler(int sd, short flags, void *cbdata); -OPAL_MODULE_DECLSPEC void pmix_usock_recv_handler(int sd, short flags, void *cbdata); -OPAL_MODULE_DECLSPEC char* pmix_usock_state_print(pmix_usock_state_t state); -OPAL_MODULE_DECLSPEC void pmix_usock_dump(const char* msg); -OPAL_MODULE_DECLSPEC int usock_send_connect_ack(void); - - -/* internal convenience macros */ -#define PMIX_ACTIVATE_SEND_RECV(b, cb, d) \ - do { \ - pmix_usock_sr_t *ms; \ - opal_output_verbose(5, opal_pmix_base_framework.framework_output, \ - "%s [%s:%d] post send to server", \ - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \ - __FILE__, __LINE__); \ - ms = OBJ_NEW(pmix_usock_sr_t); \ - ms->bfr = (b); \ - ms->cbfunc = (cb); \ - ms->cbdata = (d); \ - opal_event_set(mca_pmix_native_component.evbase, &((ms)->ev), -1, \ - OPAL_EV_WRITE, pmix_usock_send_recv, (ms)); \ - opal_event_set_priority(&((ms)->ev), OPAL_EV_MSG_LO_PRI); \ - opal_event_active(&((ms)->ev), OPAL_EV_WRITE, 1); \ - } while(0); - -#define PMIX_ACTIVATE_POST_MSG(ms) \ - do { \ - opal_output_verbose(5, opal_pmix_base_framework.framework_output, \ - "%s [%s:%d] post msg", \ - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \ - __FILE__, __LINE__); \ - opal_event_set(mca_pmix_native_component.evbase, &ms->ev, -1, \ - OPAL_EV_WRITE, \ - pmix_usock_process_msg, ms); \ - opal_event_set_priority(&ms->ev, OPAL_EV_MSG_LO_PRI); \ - opal_event_active(&ms->ev, OPAL_EV_WRITE, 1); \ - } while(0); - -#define CLOSE_THE_SOCKET(socket) \ - do { \ - if (0 <= socket) { \ - shutdown(socket, 2); \ - close(socket); \ - socket = -1; \ - } \ - } while(0) - - -#define PMIX_WAIT_FOR_COMPLETION(a) \ - do { \ - while ((a)) { \ - usleep(10); \ - } \ - } while (0); - -END_C_DECLS - -#endif /* MCA_PMIX_NATIVE_H */ diff --git a/opal/mca/pmix/native/pmix_native_component.c b/opal/mca/pmix/native/pmix_native_component.c deleted file mode 100644 index 8b10adf897..0000000000 --- a/opal/mca/pmix/native/pmix_native_component.c +++ /dev/null @@ -1,139 +0,0 @@ -/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ -/* - * Copyright (c) 2014 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * Copyright (c) 2015 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - * These symbols are in a file by themselves to provide nice linker - * semantics. Since linkers generally pull in symbols by object - * files, keeping these symbols as the only symbols in this file - * prevents utility programs such as "ompi_info" from having to import - * entire components just to query their version and parameters. - */ - -#include "opal_config.h" - -#include "opal/constants.h" -#include "opal/util/proc.h" -#include "opal/mca/pmix/pmix.h" -#include "pmix_native.h" - -/* - * Public string showing the pmix native component version number - */ -const char *opal_pmix_native_component_version_string = - "OPAL native pmix MCA component version " OPAL_VERSION; - -/* - * Local function - */ -static int pmix_native_open(void); -static int pmix_native_close(void); -static int pmix_native_component_query(mca_base_module_t **module, int *priority); - - -/* - * Instantiate the public struct with all of our public information - * and pointers to our public functions in it - */ - -opal_pmix_native_component_t mca_pmix_native_component = { - { - - /* First, the mca_component_t struct containing meta information - about the component itself */ - - .base_version = { - /* Indicate that we are a pmix v1.1.0 component (which also - implies a specific MCA version) */ - - OPAL_PMIX_BASE_VERSION_2_0_0, - - /* Component name and version */ - - .mca_component_name = "native", - MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION, - OPAL_RELEASE_VERSION), - - /* Component open and close functions */ - - .mca_open_component = pmix_native_open, - .mca_close_component = pmix_native_close, - .mca_query_component = pmix_native_component_query, - }, - /* Next the MCA v1.0.0 component meta data */ - .base_data = { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - } - } -}; - -static int pmix_native_open(void) -{ - /* construct the component fields */ - mca_pmix_native_component.cache_local = NULL; - mca_pmix_native_component.cache_remote = NULL; - mca_pmix_native_component.cache_global = NULL; - mca_pmix_native_component.evbase = NULL; - mca_pmix_native_component.id = opal_name_invalid; - mca_pmix_native_component.server = opal_name_invalid; - mca_pmix_native_component.uri = NULL; - memset(&mca_pmix_native_component.address, 0, sizeof(struct sockaddr_un)); - mca_pmix_native_component.sd = -1; - mca_pmix_native_component.max_retries = 10; - mca_pmix_native_component.state = PMIX_USOCK_UNCONNECTED; - mca_pmix_native_component.tag = 0; - mca_pmix_native_component.send_ev_active = false; - mca_pmix_native_component.recv_ev_active = false; - mca_pmix_native_component.timer_ev_active = false; - OBJ_CONSTRUCT(&mca_pmix_native_component.send_queue, opal_list_t); - mca_pmix_native_component.send_msg = NULL; - mca_pmix_native_component.recv_msg = NULL; - OBJ_CONSTRUCT(&mca_pmix_native_component.posted_recvs, opal_list_t); - - return OPAL_SUCCESS; -} - -static int pmix_native_close(void) -{ - if (NULL != mca_pmix_native_component.uri) { - free(mca_pmix_native_component.uri); - } - OPAL_LIST_DESTRUCT(&mca_pmix_native_component.send_queue); - OPAL_LIST_DESTRUCT(&mca_pmix_native_component.posted_recvs); - return OPAL_SUCCESS; -} - - -static int pmix_native_component_query(mca_base_module_t **module, int *priority) -{ - char *t, *id; - - /* see if a PMIx server is present */ - if (NULL == (t = getenv("PMIX_SERVER_URI")) || - NULL == (id = getenv("PMIX_ID"))) { - /* we still have to be considered because this might - * be a singleton, and even a singleton requires some - * degree of support. So set us at a very low priority - * so the other components can be selected it they - * are in a better position to run */ - *priority = 1; - mca_pmix_native_component.uri = NULL; - } else { - /* if PMIx is present, then we need to use it */ - opal_convert_string_to_process_name(&mca_pmix_native_component.id, id); - mca_pmix_native_component.uri = strdup(t); - opal_proc_set_name(&mca_pmix_native_component.id); - *priority = 100; - } - *module = (mca_base_module_t *)&opal_pmix_native_module; - return OPAL_SUCCESS; -} diff --git a/opal/mca/pmix/native/usock.c b/opal/mca/pmix/native/usock.c deleted file mode 100644 index 6de7eacf35..0000000000 --- a/opal/mca/pmix/native/usock.c +++ /dev/null @@ -1,471 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#include "opal_config.h" -#include "opal/types.h" - -#include -#ifdef HAVE_SYS_UIO_H -#include -#endif -#ifdef HAVE_NET_UIO_H -#include -#endif -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_SYS_STAT_H -#include -#endif -#ifdef HAVE_FCNTL_H -#include -#endif - -#include "opal_stdint.h" -#include "opal/opal_socket_errno.h" -#include "opal/dss/dss.h" -#include "opal/mca/dstore/dstore.h" -#include "opal/mca/sec/sec.h" -#include "opal/runtime/opal.h" -#include "opal/util/show_help.h" -#include "opal/util/error.h" -#include "opal/util/output.h" -#include "opal/util/proc.h" - -#include "opal/mca/pmix/base/base.h" -#include "pmix_native.h" - -static int usock_send_blocking(char *ptr, size_t size); -static void pmix_usock_try_connect(int fd, short args, void *cbdata); - -/* State machine for internal operations */ -typedef struct { - opal_object_t super; - opal_event_t ev; -} pmix_usock_op_t; -static OBJ_CLASS_INSTANCE(pmix_usock_op_t, - opal_object_t, - NULL, NULL); - -#define PMIX_ACTIVATE_USOCK_STATE(cbfunc) \ - do { \ - pmix_usock_op_t *op; \ - op = OBJ_NEW(pmix_usock_op_t); \ - opal_event_set(mca_pmix_native_component.evbase, &op->ev, -1, \ - OPAL_EV_WRITE, (cbfunc), op); \ - opal_event_set_priority(&op->ev, OPAL_EV_MSG_LO_PRI); \ - opal_event_active(&op->ev, OPAL_EV_WRITE, 1); \ - } while(0); - -void pmix_usock_send_recv(int fd, short args, void *cbdata) -{ - pmix_usock_sr_t *ms = (pmix_usock_sr_t*)cbdata; - pmix_usock_posted_recv_t *req; - pmix_usock_send_t *snd; - uint32_t tag = UINT32_MAX; - - if (NULL != ms->cbfunc) { - /* if a callback msg is expected, setup a recv for it */ - req = OBJ_NEW(pmix_usock_posted_recv_t); - /* take the next tag in the sequence */ - if (UINT32_MAX == mca_pmix_native_component.tag) { - mca_pmix_native_component.tag = 0; - } - req->tag = mca_pmix_native_component.tag++; - tag = req->tag; - req->cbfunc = ms->cbfunc; - req->cbdata = ms->cbdata; - opal_output_verbose(5, opal_pmix_base_framework.framework_output, - "%s posting recv on tag %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), req->tag); - /* add it to the list of recvs - we cannot have unexpected messages - * in this subsystem as the server never sends us something that - * we didn't previously request */ - opal_list_append(&mca_pmix_native_component.posted_recvs, &req->super); - } - - snd = OBJ_NEW(pmix_usock_send_t); - snd->hdr.id = mca_pmix_native_component.id; - snd->hdr.type = PMIX_USOCK_USER; - snd->hdr.tag = tag; - snd->hdr.nbytes = ms->bfr->bytes_used; - snd->data = ms->bfr->base_ptr; - /* always start with the header */ - snd->sdptr = (char*)&snd->hdr; - snd->sdbytes = sizeof(pmix_usock_hdr_t); - - /* add the msg to the send queue if we are already connected*/ - if (PMIX_USOCK_CONNECTED == mca_pmix_native_component.state) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:send_nb: already connected to server - queueing for send", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - /* if there is no message on-deck, put this one there */ - if (NULL == mca_pmix_native_component.send_msg) { - mca_pmix_native_component.send_msg = snd; - } else { - /* add it to the queue */ - opal_list_append(&mca_pmix_native_component.send_queue, &snd->super); - } - /* ensure the send event is active */ - if (!mca_pmix_native_component.send_ev_active) { - opal_event_add(&mca_pmix_native_component.send_event, 0); - mca_pmix_native_component.send_ev_active = true; - } - return; - } - - /* add the message to the queue for sending after the - * connection is formed - */ - opal_list_append(&mca_pmix_native_component.send_queue, &snd->super); - - if (PMIX_USOCK_CONNECTING != mca_pmix_native_component.state && - PMIX_USOCK_CONNECT_ACK != mca_pmix_native_component.state) { - /* we have to initiate the connection - again, we do not - * want to block while the connection is created. - * So throw us into an event that will create - * the connection via a mini-state-machine :-) - */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:send_nb: initiating connection to server", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - mca_pmix_native_component.state = PMIX_USOCK_CONNECTING; - PMIX_ACTIVATE_USOCK_STATE(pmix_usock_try_connect); - } -} - -void pmix_usock_process_msg(int fd, short flags, void *cbdata) -{ - pmix_usock_recv_t *msg = (pmix_usock_recv_t*)cbdata; - pmix_usock_posted_recv_t *rcv; - opal_buffer_t buf; - - OPAL_OUTPUT_VERBOSE((5, opal_pmix_base_framework.framework_output, - "%s message received %d bytes for tag %u", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - (int)msg->hdr.nbytes, msg->hdr.tag)); - - /* see if we have a waiting recv for this message */ - OPAL_LIST_FOREACH(rcv, &mca_pmix_native_component.posted_recvs, pmix_usock_posted_recv_t) { - opal_output_verbose(5, opal_pmix_base_framework.framework_output, - "%s checking msg on tag %u for tag %u", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - msg->hdr.tag, rcv->tag); - - if (msg->hdr.tag == rcv->tag) { - if (NULL != rcv->cbfunc) { - /* construct and load the buffer */ - OBJ_CONSTRUCT(&buf, opal_buffer_t); - if (NULL != msg->data) { - opal_dss.load(&buf, msg->data, msg->hdr.nbytes); - } - msg->data = NULL; // protect the data region - if (NULL != rcv->cbfunc) { - rcv->cbfunc(&buf, rcv->cbdata); - } - OBJ_DESTRUCT(&buf); // free's the msg data - /* also done with the recv */ - opal_list_remove_item(&mca_pmix_native_component.posted_recvs, &rcv->super); - OBJ_RELEASE(rcv); - OBJ_RELEASE(msg); - return; - } - } - } - - /* we get here if no matching recv was found - this is an error */ - opal_output(0, "%s UNEXPECTED MESSAGE", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - OBJ_RELEASE(msg); -} - -/* - * Try connecting to a peer - */ -static void pmix_usock_try_connect(int fd, short args, void *cbdata) -{ - int rc, flags; - opal_socklen_t addrlen = 0; - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock_peer_try_connect: attempting to connect to server", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - addrlen = sizeof(struct sockaddr_un); - - while (mca_pmix_native_component.retries < mca_pmix_native_component.max_retries) { - mca_pmix_native_component.retries++; - /* Create the new socket */ - mca_pmix_native_component.sd = socket(PF_UNIX, SOCK_STREAM, 0); - if (mca_pmix_native_component.sd < 0) { - opal_output(0, "pmix:create_socket: socket() failed: %s (%d)\n", - strerror(opal_socket_errno), - opal_socket_errno); - continue; - } - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "usock_peer_try_connect: attempting to connect to server on socket %d", - mca_pmix_native_component.sd); - /* try to connect */ - if (connect(mca_pmix_native_component.sd, (struct sockaddr*)&mca_pmix_native_component.address, addrlen) < 0) { - if (opal_socket_errno == ETIMEDOUT) { - /* The server may be too busy to accept new connections, - * so cycle around and let it try again */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "timeout connecting to server"); - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - continue; - } - - /* Some kernels (Linux 2.6) will automatically software - abort a connection that was ECONNREFUSED on the last - attempt, without even trying to establish the - connection. Handle that case in a semi-rational - way by trying again before giving up */ - if (ECONNABORTED == opal_socket_errno) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "connection to server aborted by OS - retrying"); - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - continue; - } - } - /* otherwise, the connect succeeded - so break out of the loop */ - break; - } - - if (mca_pmix_native_component.retries == mca_pmix_native_component.max_retries || - mca_pmix_native_component.sd < 0){ - /* We were unsuccessful in establishing this connection, and are - * not likely to suddenly become successful */ - opal_output(0, "pmix:create_socket: connection to server failed"); - if (0 <= mca_pmix_native_component.sd) { - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - } - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - return; - } - - /* connection succeeded */ - mca_pmix_native_component.retries = 0; - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s sock_peer_try_connect: Connection across to server succeeded", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - /* setup event callbacks */ - opal_event_set(mca_pmix_native_component.evbase, - &mca_pmix_native_component.recv_event, - mca_pmix_native_component.sd, - OPAL_EV_READ|OPAL_EV_PERSIST, - pmix_usock_recv_handler, NULL); - opal_event_set_priority(&mca_pmix_native_component.recv_event, OPAL_EV_MSG_LO_PRI); - mca_pmix_native_component.recv_ev_active = false; - - opal_event_set(mca_pmix_native_component.evbase, - &mca_pmix_native_component.send_event, - mca_pmix_native_component.sd, - OPAL_EV_WRITE|OPAL_EV_PERSIST, - pmix_usock_send_handler, NULL); - opal_event_set_priority(&mca_pmix_native_component.send_event, OPAL_EV_MSG_LO_PRI); - mca_pmix_native_component.send_ev_active = false; - - /* setup the socket as non-blocking */ - if ((flags = fcntl(mca_pmix_native_component.sd, F_GETFL, 0)) < 0) { - opal_output(0, "usock_peer_connect: fcntl(F_GETFL) failed: %s (%d)\n", - strerror(opal_socket_errno), - opal_socket_errno); - } else { - flags |= O_NONBLOCK; - if (fcntl(mca_pmix_native_component.sd, F_SETFL, flags) < 0) - opal_output(0, "usock_peer_connect: fcntl(F_SETFL) failed: %s (%d)\n", - strerror(opal_socket_errno), - opal_socket_errno); - } - - /* setup our recv to catch the return ack call */ - if (!mca_pmix_native_component.recv_ev_active) { - opal_event_add(&mca_pmix_native_component.recv_event, 0); - mca_pmix_native_component.recv_ev_active = true; - } - - /* send our globally unique process identifier to the server */ - if (OPAL_SUCCESS == (rc = usock_send_connect_ack())) { - mca_pmix_native_component.state = PMIX_USOCK_CONNECT_ACK; - } else { - opal_output(0, - "%s usock_peer_try_connect: " - "usock_send_connect_ack to server failed: %s (%d)", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - opal_strerror(rc), rc); - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - return; - } -} - -int usock_send_connect_ack(void) -{ - char *msg; - pmix_usock_hdr_t hdr; - int rc; - size_t sdsize; - char *cred; - size_t credsize; - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s SEND CONNECT ACK", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - /* setup the header */ - hdr.id = OPAL_PROC_MY_NAME; - hdr.tag = UINT32_MAX; - hdr.type = PMIX_USOCK_IDENT; - - /* get our security credential */ - if (OPAL_SUCCESS != (rc = opal_sec.get_my_credential(NULL, opal_dstore_internal, &OPAL_PROC_MY_NAME, &cred, &credsize))) { - return rc; - } - - /* set the number of bytes to be read beyond the header */ - hdr.nbytes = strlen(opal_version_string) + 1 + credsize; - - /* create a space for our message */ - sdsize = (sizeof(hdr) + strlen(opal_version_string) + 1 + credsize); - if (NULL == (msg = (char*)malloc(sdsize))) { - return OPAL_ERR_OUT_OF_RESOURCE; - } - memset(msg, 0, sdsize); - - /* load the message */ - memcpy(msg, &hdr, sizeof(hdr)); - memcpy(msg+sizeof(hdr), opal_version_string, strlen(opal_version_string)); - memcpy(msg+sizeof(hdr)+strlen(opal_version_string)+1, cred, credsize); - if (NULL != cred) { - free(cred); - } - - if (OPAL_SUCCESS != usock_send_blocking(msg, sdsize)) { - free(msg); - return OPAL_ERR_UNREACH; - } - free(msg); - return OPAL_SUCCESS; -} - -/* - * A blocking send on a non-blocking socket. Used to send the small amount of connection - * information that identifies the peers endpoint. - */ -static int usock_send_blocking(char *ptr, size_t size) -{ - size_t cnt = 0; - int retval; - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s send blocking of %"PRIsize_t" bytes to socket %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - size, mca_pmix_native_component.sd); - - while (cnt < size) { - retval = send(mca_pmix_native_component.sd, (char*)ptr+cnt, size-cnt, 0); - if (retval < 0) { - if (opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) { - opal_output(0, "%s usock_peer_send_blocking: send() to socket %d failed: %s (%d)\n", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mca_pmix_native_component.sd, - strerror(opal_socket_errno), - opal_socket_errno); - mca_pmix_native_component.state = PMIX_USOCK_FAILED; - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - return OPAL_ERR_UNREACH; - } - continue; - } - cnt += retval; - } - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s blocking send complete to socket %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mca_pmix_native_component.sd); - - return OPAL_SUCCESS; -} - -/* - * Routine for debugging to print the connection state and socket options - */ -void pmix_usock_dump(const char* msg) -{ - char buff[255]; - int nodelay,flags; - - if ((flags = fcntl(mca_pmix_native_component.sd, F_GETFL, 0)) < 0) { - opal_output(0, "%s usock_peer_dump: fcntl(F_GETFL) failed: %s (%d)\n", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - strerror(opal_socket_errno), - opal_socket_errno); - } - -#if defined(USOCK_NODELAY) - optlen = sizeof(nodelay); - if (getsockopt(mca_pmix_native_component.sd, IPPROTO_USOCK, USOCK_NODELAY, (char *)&nodelay, &optlen) < 0) { - opal_output(0, "%s usock_peer_dump: USOCK_NODELAY option: %s (%d)\n", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - strerror(opal_socket_errno), - opal_socket_errno); - } -#else - nodelay = 0; -#endif - - snprintf(buff, sizeof(buff), "%s %s: nodelay %d flags %08x\n", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - msg, nodelay, flags); - opal_output(0, "%s", buff); -} - -char* pmix_usock_state_print(pmix_usock_state_t state) -{ - switch (state) { - case PMIX_USOCK_UNCONNECTED: - return "UNCONNECTED"; - case PMIX_USOCK_CLOSED: - return "CLOSED"; - case PMIX_USOCK_RESOLVE: - return "RESOLVE"; - case PMIX_USOCK_CONNECTING: - return "CONNECTING"; - case PMIX_USOCK_CONNECT_ACK: - return "ACK"; - case PMIX_USOCK_CONNECTED: - return "CONNECTED"; - case PMIX_USOCK_FAILED: - return "FAILED"; - default: - return "UNKNOWN"; - } -} - diff --git a/opal/mca/pmix/native/usock_sendrecv.c b/opal/mca/pmix/native/usock_sendrecv.c deleted file mode 100644 index fa63f7844c..0000000000 --- a/opal/mca/pmix/native/usock_sendrecv.c +++ /dev/null @@ -1,758 +0,0 @@ -/* - * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2006-2013 Los Alamos National Security, LLC. - * All rights reserved. - * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2015 Intel, Inc. All rights reserved. - * Copyright (c) 2014 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "opal_config.h" - -#ifdef HAVE_UNISTD_H -#include -#endif -#include -#ifdef HAVE_SYS_UIO_H -#include -#endif -#ifdef HAVE_NET_UIO_H -#include -#endif -#ifdef HAVE_SYS_TYPES_H -#include -#endif -#ifdef HAVE_NETINET_IN_H -#include -#endif -#ifdef HAVE_ARPA_INET_H -#include -#endif -#ifdef HAVE_NETINET_TCP_H -#include -#endif - -#include "opal_stdint.h" -#include "opal/types.h" -#include "opal/runtime/opal.h" -#include "opal/opal_socket_errno.h" -#include "opal/mca/backtrace/backtrace.h" -#include "opal/util/output.h" -#include "opal/util/net.h" -#include "opal/util/error.h" -#include "opal/class/opal_hash_table.h" -#include "opal/mca/event/event.h" -#include "opal/mca/sec/sec.h" - -#include "opal/mca/pmix/base/base.h" -#include "pmix_native.h" - -static void usock_complete_connect(void); -static int usock_recv_connect_ack(void); - -static int send_bytes(pmix_usock_send_t *msg) -{ - int rc; - - while (0 < msg->sdbytes) { - rc = write(mca_pmix_native_component.sd, msg->sdptr, msg->sdbytes); - if (rc < 0) { - if (opal_socket_errno == EINTR) { - continue; - } else if (opal_socket_errno == EAGAIN) { - /* tell the caller to keep this message on active, - * but let the event lib cycle so other messages - * can progress while this socket is busy - */ - return OPAL_ERR_RESOURCE_BUSY; - } else if (opal_socket_errno == EWOULDBLOCK) { - /* tell the caller to keep this message on active, - * but let the event lib cycle so other messages - * can progress while this socket is busy - */ - return OPAL_ERR_WOULD_BLOCK; - } - /* we hit an error and cannot progress this message */ - opal_output(0, "%s pmix_usock_msg_send_bytes: write failed: %s (%d) [sd = %d]", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - strerror(opal_socket_errno), - opal_socket_errno, - mca_pmix_native_component.sd); - return OPAL_ERR_COMM_FAILURE; - } - /* update location */ - msg->sdbytes -= rc; - msg->sdptr += rc; - } - /* we sent the full data block */ - return OPAL_SUCCESS; -} - -/* - * A file descriptor is available/ready for send. Check the state - * of the socket and take the appropriate action. - */ -void pmix_usock_send_handler(int sd, short flags, void *cbdata) -{ - pmix_usock_send_t *msg = mca_pmix_native_component.send_msg; - int rc; - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:send_handler called to send to server", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - switch (mca_pmix_native_component.state) { - case PMIX_USOCK_CONNECTING: - case PMIX_USOCK_CLOSED: - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "usock:send_handler %s", - pmix_usock_state_print(mca_pmix_native_component.state)); - usock_complete_connect(); - /* de-activate the send event until the connection - * handshake completes - */ - if (mca_pmix_native_component.send_ev_active) { - opal_event_del(&mca_pmix_native_component.send_event); - mca_pmix_native_component.send_ev_active = false; - } - break; - case PMIX_USOCK_CONNECTED: - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:send_handler SENDING TO SERVER with %s msg", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - (NULL == msg) ? "NULL" : "NON-NULL"); - if (NULL != msg) { - if (!msg->hdr_sent) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:send_handler SENDING HEADER", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - if (OPAL_SUCCESS == (rc = send_bytes(msg))) { - /* header is completely sent */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:send_handler HEADER SENT", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - msg->hdr_sent = true; - /* setup to send the data */ - if (NULL == msg->data) { - /* this was a zero-byte msg - nothing more to do */ - OBJ_RELEASE(msg); - mca_pmix_native_component.send_msg = NULL; - goto next; - } else { - /* send the data as a single block */ - msg->sdptr = msg->data; - msg->sdbytes = msg->hdr.nbytes; - } - /* fall thru and let the send progress */ - } else if (OPAL_ERR_RESOURCE_BUSY == rc || - OPAL_ERR_WOULD_BLOCK == rc) { - /* exit this event and let the event lib progress */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:send_handler RES BUSY OR WOULD BLOCK", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - return; - } else { - // report the error - opal_output(0, "%s pmix_usock_peer_send_handler: unable to send message ON SOCKET %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mca_pmix_native_component.sd); - opal_event_del(&mca_pmix_native_component.send_event); - mca_pmix_native_component.send_ev_active = false; - OBJ_RELEASE(msg); - mca_pmix_native_component.send_msg = NULL; - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - return; - } - } - - if (msg->hdr_sent) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:send_handler SENDING BODY OF MSG", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - if (OPAL_SUCCESS == (rc = send_bytes(msg))) { - // message is complete - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:send_handler BODY SENT", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - OBJ_RELEASE(msg); - mca_pmix_native_component.send_msg = NULL; - goto next; - } else if (OPAL_ERR_RESOURCE_BUSY == rc || - OPAL_ERR_WOULD_BLOCK == rc) { - /* exit this event and let the event lib progress */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:send_handler RES BUSY OR WOULD BLOCK", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - return; - } else { - // report the error - opal_output(0, "%s pmix_usock_peer_send_handler: unable to send message ON SOCKET %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mca_pmix_native_component.sd); - opal_event_del(&mca_pmix_native_component.send_event); - mca_pmix_native_component.send_ev_active = false; - OBJ_RELEASE(msg); - mca_pmix_native_component.send_msg = NULL; - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - return; - } - } - - next: - /* if current message completed - progress any pending sends by - * moving the next in the queue into the "on-deck" position. Note - * that this doesn't mean we send the message right now - we will - * wait for another send_event to fire before doing so. This gives - * us a chance to service any pending recvs. - */ - mca_pmix_native_component.send_msg = (pmix_usock_send_t*) - opal_list_remove_first(&mca_pmix_native_component.send_queue); - } - - /* if nothing else to do unregister for send event notifications */ - if (NULL == mca_pmix_native_component.send_msg && - mca_pmix_native_component.send_ev_active) { - opal_event_del(&mca_pmix_native_component.send_event); - mca_pmix_native_component.send_ev_active = false; - } - break; - - default: - opal_output(0, "%s pmix_usock_peer_send_handler: invalid connection state (%d) on socket %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mca_pmix_native_component.state, mca_pmix_native_component.sd); - if (mca_pmix_native_component.send_ev_active) { - opal_event_del(&mca_pmix_native_component.send_event); - mca_pmix_native_component.send_ev_active = false; - } - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - break; - } -} - -static int read_bytes(pmix_usock_recv_t* recv) -{ - int rc; - - /* read until all bytes recvd or error */ - while (0 < recv->rdbytes) { - rc = read(mca_pmix_native_component.sd, recv->rdptr, recv->rdbytes); - if (rc < 0) { - if(opal_socket_errno == EINTR) { - continue; - } else if (opal_socket_errno == EAGAIN) { - /* tell the caller to keep this message on active, - * but let the event lib cycle so other messages - * can progress while this socket is busy - */ - return OPAL_ERR_RESOURCE_BUSY; - } else if (opal_socket_errno == EWOULDBLOCK) { - /* tell the caller to keep this message on active, - * but let the event lib cycle so other messages - * can progress while this socket is busy - */ - return OPAL_ERR_WOULD_BLOCK; - } - /* we hit an error and cannot progress this message - report - * the error back to the RML and let the caller know - * to abort this message - */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix_usock_msg_recv: readv failed: %s (%d)", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - strerror(opal_socket_errno), - opal_socket_errno); - return OPAL_ERR_COMM_FAILURE; - } else if (rc == 0) { - /* the remote peer closed the connection - report that condition - * and let the caller know - */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s pmix_usock_msg_recv: peer closed connection", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - /* stop all events */ - if (mca_pmix_native_component.recv_ev_active) { - opal_event_del(&mca_pmix_native_component.recv_event); - mca_pmix_native_component.recv_ev_active = false; - } - if (mca_pmix_native_component.timer_ev_active) { - opal_event_del(&mca_pmix_native_component.timer_event); - mca_pmix_native_component.timer_ev_active = false; - } - if (mca_pmix_native_component.send_ev_active) { - opal_event_del(&mca_pmix_native_component.send_event); - mca_pmix_native_component.send_ev_active = false; - } - if (NULL != mca_pmix_native_component.recv_msg) { - OBJ_RELEASE(mca_pmix_native_component.recv_msg); - mca_pmix_native_component.recv_msg = NULL; - } - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - return OPAL_ERR_WOULD_BLOCK; - } - /* we were able to read something, so adjust counters and location */ - recv->rdbytes -= rc; - recv->rdptr += rc; - } - - /* we read the full data block */ - return OPAL_SUCCESS; -} - -/* - * Dispatch to the appropriate action routine based on the state - * of the connection with the peer. - */ - -void pmix_usock_recv_handler(int sd, short flags, void *cbdata) -{ - int rc; - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:recv:handler called", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - switch (mca_pmix_native_component.state) { - case PMIX_USOCK_CONNECT_ACK: - if (OPAL_SUCCESS == (rc = usock_recv_connect_ack())) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:recv:handler starting send/recv events", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - /* we connected! Start the send/recv events */ - if (!mca_pmix_native_component.recv_ev_active) { - opal_event_add(&mca_pmix_native_component.recv_event, 0); - mca_pmix_native_component.recv_ev_active = true; - } - if (mca_pmix_native_component.timer_ev_active) { - opal_event_del(&mca_pmix_native_component.timer_event); - mca_pmix_native_component.timer_ev_active = false; - } - /* if there is a message waiting to be sent, queue it */ - if (NULL == mca_pmix_native_component.send_msg) { - mca_pmix_native_component.send_msg = (pmix_usock_send_t*)opal_list_remove_first(&mca_pmix_native_component.send_queue); - } - if (NULL != mca_pmix_native_component.send_msg && !mca_pmix_native_component.send_ev_active) { - opal_event_add(&mca_pmix_native_component.send_event, 0); - mca_pmix_native_component.send_ev_active = true; - } - /* update our state */ - mca_pmix_native_component.state = PMIX_USOCK_CONNECTED; - } else { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s UNABLE TO COMPLETE CONNECT ACK WITH SERVER", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - opal_event_del(&mca_pmix_native_component.recv_event); - mca_pmix_native_component.recv_ev_active = false; - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - return; - } - break; - case PMIX_USOCK_CONNECTED: - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:recv:handler CONNECTED", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - /* allocate a new message and setup for recv */ - if (NULL == mca_pmix_native_component.recv_msg) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:recv:handler allocate new recv msg", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - mca_pmix_native_component.recv_msg = OBJ_NEW(pmix_usock_recv_t); - if (NULL == mca_pmix_native_component.recv_msg) { - opal_output(0, "%s usock_recv_handler: unable to allocate recv message\n", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - return; - } - /* start by reading the header */ - mca_pmix_native_component.recv_msg->rdptr = (char*)&mca_pmix_native_component.recv_msg->hdr; - mca_pmix_native_component.recv_msg->rdbytes = sizeof(pmix_usock_hdr_t); - } - /* if the header hasn't been completely read, read it */ - if (!mca_pmix_native_component.recv_msg->hdr_recvd) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "usock:recv:handler read hdr"); - if (OPAL_SUCCESS == (rc = read_bytes(mca_pmix_native_component.recv_msg))) { - /* completed reading the header */ - mca_pmix_native_component.recv_msg->hdr_recvd = true; - /* if this is a zero-byte message, then we are done */ - if (0 == mca_pmix_native_component.recv_msg->hdr.nbytes) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s RECVD ZERO-BYTE MESSAGE FROM SERVER for tag %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mca_pmix_native_component.recv_msg->hdr.tag); - mca_pmix_native_component.recv_msg->data = NULL; // make sure - mca_pmix_native_component.recv_msg->rdptr = NULL; - mca_pmix_native_component.recv_msg->rdbytes = 0; - } else { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:recv:handler allocate data region of size %lu", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - (unsigned long)mca_pmix_native_component.recv_msg->hdr.nbytes); - /* allocate the data region */ - mca_pmix_native_component.recv_msg->data = (char*)malloc(mca_pmix_native_component.recv_msg->hdr.nbytes); - /* point to it */ - mca_pmix_native_component.recv_msg->rdptr = mca_pmix_native_component.recv_msg->data; - mca_pmix_native_component.recv_msg->rdbytes = mca_pmix_native_component.recv_msg->hdr.nbytes; - } - /* fall thru and attempt to read the data */ - } else if (OPAL_ERR_RESOURCE_BUSY == rc || - OPAL_ERR_WOULD_BLOCK == rc) { - /* exit this event and let the event lib progress */ - return; - } else { - /* close the connection */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:recv:handler error reading bytes - closing connection", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - return; - } - } - - if (mca_pmix_native_component.recv_msg->hdr_recvd) { - /* continue to read the data block - we start from - * wherever we left off, which could be at the - * beginning or somewhere in the message - */ - if (OPAL_SUCCESS == (rc = read_bytes(mca_pmix_native_component.recv_msg))) { - /* we recvd all of the message */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s RECVD COMPLETE MESSAGE FROM SERVER OF %d BYTES FOR TAG %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - (int)mca_pmix_native_component.recv_msg->hdr.nbytes, - mca_pmix_native_component.recv_msg->hdr.tag); - /* post it for delivery */ - PMIX_ACTIVATE_POST_MSG(mca_pmix_native_component.recv_msg); - mca_pmix_native_component.recv_msg = NULL; - } else if (OPAL_ERR_RESOURCE_BUSY == rc || - OPAL_ERR_WOULD_BLOCK == rc) { - /* exit this event and let the event lib progress */ - return; - } else { - // report the error - opal_output(0, "%s usock_peer_recv_handler: unable to recv message", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - /* turn off the recv event */ - opal_event_del(&mca_pmix_native_component.recv_event); - mca_pmix_native_component.recv_ev_active = false; - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - return; - } - } - break; - default: - opal_output(0, "%s usock_peer_recv_handler: invalid socket state(%d)", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mca_pmix_native_component.state); - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - break; - } -} - -/* - * A blocking recv on a non-blocking socket. Used to receive the small amount of connection - * information that identifies the peers endpoint. - */ -static bool usock_recv_blocking(char *data, size_t size) -{ - size_t cnt = 0; - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s waiting for connect ack from server", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - while (cnt < size) { - int retval = recv(mca_pmix_native_component.sd, (char *)data+cnt, size-cnt, 0); - - /* remote closed connection */ - if (retval == 0) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock_recv_blocking: server closed connection: state %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mca_pmix_native_component.state); - mca_pmix_native_component.state = PMIX_USOCK_CLOSED; - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - return false; - } - - /* socket is non-blocking so handle errors */ - if (retval < 0) { - if (opal_socket_errno != EINTR && - opal_socket_errno != EAGAIN && - opal_socket_errno != EWOULDBLOCK) { - if (mca_pmix_native_component.state == PMIX_USOCK_CONNECT_ACK) { - /* If we overflow the listen backlog, it's - possible that even though we finished the three - way handshake, the remote host was unable to - transition the connection from half connected - (received the initial SYN) to fully connected - (in the listen backlog). We likely won't see - the failure until we try to receive, due to - timing and the like. The first thing we'll get - in that case is a RST packet, which receive - will turn into a connection reset by peer - errno. In that case, leave the socket in - CONNECT_ACK and propogate the error up to - recv_connect_ack, who will try to establish the - connection again */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s connect ack received error %s from server", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - strerror(opal_socket_errno)); - return false; - } else { - opal_output(0, - "%s usock_recv_blocking: " - "recv() failed for server: %s (%d)\n", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - strerror(opal_socket_errno), - opal_socket_errno); - mca_pmix_native_component.state = PMIX_USOCK_FAILED; - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - return false; - } - } - continue; - } - cnt += retval; - } - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s connect ack received from server", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - return true; -} - - -/* - * Receive the peers globally unique process identification from a newly - * connected socket and verify the expected response. If so, move the - * socket to a connected state. - */ -static int usock_recv_connect_ack(void) -{ - char *msg; - char *version; - int rc; - char *cred; - size_t credsize; - pmix_usock_hdr_t hdr; - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s RECV CONNECT ACK FROM SERVER ON SOCKET %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mca_pmix_native_component.sd); - - /* ensure all is zero'd */ - memset(&hdr, 0, sizeof(pmix_usock_hdr_t)); - - if (usock_recv_blocking((char*)&hdr, sizeof(pmix_usock_hdr_t))) { - /* If the state is CONNECT_ACK, then we were waiting for - * the connection to be ack'd - */ - if (mca_pmix_native_component.state != PMIX_USOCK_CONNECT_ACK) { - /* handshake broke down - abort this connection */ - opal_output(0, "%s RECV CONNECT BAD HANDSHAKE FROM SERVER ON SOCKET %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mca_pmix_native_component.sd); - mca_pmix_native_component.state = PMIX_USOCK_FAILED; - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - return OPAL_ERR_UNREACH; - } - } else { - /* unable to complete the recv */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s unable to complete recv of connect-ack from server ON SOCKET %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mca_pmix_native_component.sd); - return OPAL_ERR_UNREACH; - } - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s connect-ack recvd from server", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - /* compare the servers name to the expected value */ - if (0 != opal_compare_proc(hdr.id, mca_pmix_native_component.server)) { - opal_output(0, "usock_peer_recv_connect_ack: " - "%s received unexpected process identifier (%s) from server: expected (%s)", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - OPAL_NAME_PRINT(hdr.id), - OPAL_NAME_PRINT(mca_pmix_native_component.server)); - mca_pmix_native_component.state = PMIX_USOCK_FAILED; - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - return OPAL_ERR_UNREACH; - } - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s connect-ack header from server is okay", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - /* get the authentication and version payload */ - if (NULL == (msg = (char*)malloc(hdr.nbytes))) { - mca_pmix_native_component.state = PMIX_USOCK_FAILED; - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - return OPAL_ERR_OUT_OF_RESOURCE; - } - if (!usock_recv_blocking(msg, hdr.nbytes)) { - /* unable to complete the recv */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s unable to complete recv of connect-ack from server ON SOCKET %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mca_pmix_native_component.sd); - free(msg); - return OPAL_ERR_UNREACH; - } - - /* check that this is from a matching version */ - version = (char*)(msg); - if (0 != strcmp(version, opal_version_string)) { - opal_output(0, "usock_peer_recv_connect_ack: " - "%s received different version from server: %s instead of %s", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - version, opal_version_string); - mca_pmix_native_component.state = PMIX_USOCK_FAILED; - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - free(msg); - return OPAL_ERR_UNREACH; - } - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s connect-ack version from server matches ours", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - /* check security token */ - cred = (char*)(msg + strlen(version) + 1); - credsize = hdr.nbytes - strlen(version) - 1; - if (OPAL_SUCCESS != (rc = opal_sec.authenticate(cred, credsize, NULL))) { - OPAL_ERROR_LOG(rc); - mca_pmix_native_component.state = PMIX_USOCK_FAILED; - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - free(msg); - return OPAL_ERR_UNREACH; - } - free(msg); - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s connect-ack from server authenticated", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - /* connected */ - mca_pmix_native_component.state = PMIX_USOCK_CONNECTED; - /* initiate send of first message on queue */ - if (NULL == mca_pmix_native_component.send_msg) { - mca_pmix_native_component.send_msg = (pmix_usock_send_t*) - opal_list_remove_first(&mca_pmix_native_component.send_queue); - } - if (NULL != mca_pmix_native_component.send_msg && !mca_pmix_native_component.send_ev_active) { - opal_event_add(&mca_pmix_native_component.send_event, 0); - mca_pmix_native_component.send_ev_active = true; - } - if (2 <= opal_output_get_verbosity(opal_pmix_base_framework.framework_output)) { - pmix_usock_dump("connected"); - } - return OPAL_SUCCESS; -} - - -/* - * Check the status of the connection. If the connection failed, will retry - * later. Otherwise, send this process' identifier to the server on the - * newly connected socket. - */ -static void usock_complete_connect(void) -{ - int so_error = 0; - opal_socklen_t so_length = sizeof(so_error); - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:complete_connect called for server on socket %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - mca_pmix_native_component.sd); - - /* check connect completion status */ - if (getsockopt(mca_pmix_native_component.sd, SOL_SOCKET, SO_ERROR, (char *)&so_error, &so_length) < 0) { - opal_output(0, "%s usock_peer_complete_connect: getsockopt() to server failed: %s (%d)\n", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - strerror(opal_socket_errno), - opal_socket_errno); - mca_pmix_native_component.state = PMIX_USOCK_FAILED; - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - PMIX_NATIVE_ABNORMAL_TERM; - return; - } - - if (so_error == EINPROGRESS) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock:send:handler still in progress", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - return; - } else if (so_error == ECONNREFUSED || so_error == ETIMEDOUT) { - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock_peer_complete_connect: connection to server failed: %s (%d)", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - strerror(so_error), - so_error); - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - return; - } else if (so_error != 0) { - /* No need to worry about the return code here - we return regardless - at this point, and if an error did occur a message has already been - printed for the user */ - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock_peer_complete_connect: " - "connection to server failed with error %d", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), - so_error); - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - return; - } - - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock_peer_complete_connect: sending ack to server", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - if (usock_send_connect_ack() == OPAL_SUCCESS) { - mca_pmix_native_component.state = PMIX_USOCK_CONNECT_ACK; - opal_output_verbose(2, opal_pmix_base_framework.framework_output, - "%s usock_peer_complete_connect: setting read event on connection to server", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - - if (!mca_pmix_native_component.recv_ev_active) { - opal_event_add(&mca_pmix_native_component.recv_event, 0); - mca_pmix_native_component.recv_ev_active = true; - } - } else { - opal_output(0, "%s usock_complete_connect: unable to send connect ack to server", - OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)); - CLOSE_THE_SOCKET(mca_pmix_native_component.sd); - PMIX_NATIVE_ABNORMAL_TERM; // report the error upstream - } -} - diff --git a/opal/mca/pmix/pmix.h b/opal/mca/pmix/pmix.h index 951565c8db..b525ad2529 100644 --- a/opal/mca/pmix/pmix.h +++ b/opal/mca/pmix/pmix.h @@ -16,130 +16,48 @@ #include "opal_config.h" #include "opal/types.h" +#ifdef HAVE_SYS_UN_H +#include +#endif + #include "opal/mca/mca.h" #include "opal/mca/event/event.h" #include "opal/dss/dss.h" #include "opal/runtime/opal.h" -#include "opal/mca/dstore/dstore.h" +#include "opal/dss/dss.h" #include "opal/util/error.h" #include "opal/util/proc.h" +#include "opal/mca/pmix/pmix_types.h" +#include "opal/mca/pmix/pmix_server.h" + BEGIN_C_DECLS -/* define some maximum sizes */ -#define PMIX_MAX_VALLEN 1024 -#define PMIX_MAX_INFO_KEY 255 -#define PMIX_MAX_INFO_VAL 1024 - -/* define an INFO object corresponding to - * the MPI_Info structure */ -typedef struct { - opal_list_item_t super; - char key[PMIX_MAX_INFO_KEY]; - char value[PMIX_MAX_INFO_VAL]; -} pmix_info_t; -OBJ_CLASS_DECLARATION(pmix_info_t); - -/* define a scope for data "put" by PMI per the following: - * - * PMI_LOCAL - the data is intended only for other application - * processes on the same node. Data marked in this way - * will not be included in data packages sent to remote requestors - * PMI_REMOTE - the data is intended solely for applications processes on - * remote nodes. Data marked in this way will not be shared with - * other processes on the same node - * PMI_GLOBAL - the data is to be shared with all other requesting processes, - * regardless of location - */ -typedef uint8_t opal_pmix_scope_t; -#define PMIX_SCOPE_T OPAL_UINT8 -#define PMIX_SCOPE_UNDEF 0 -#define PMIX_INTERNAL 1 // data used internally only -#define PMIX_LOCAL 2 // share to procs also on this node -#define PMIX_REMOTE 3 // share with procs not on this node -#define PMIX_GLOBAL 4 // share with all procs (local + remote) - -/* callback function for non-blocking operations */ -typedef void (*opal_pmix_cbfunc_t)(int status, opal_value_t *kv, void *cbdata); - -/* flags to indicate if the modex value being pushed into - * the PMIx server comes from an element that is ready to - * support async modex operations, or from one that requires - * synchronous modex (i.e., blocking modex operation) */ -#define PMIX_SYNC_REQD true -#define PMIX_ASYNC_RDY false - -/* define a set of "standard" PMIx attributes that can - * be queried. Implementations (and users) are free to extend as - * desired, so the get_attr functions need to be capable - * of handling the "not found" condition. Note that these - * are attributes of the system and the job as opposed to - * values the application (or underlying MPI library) - * might choose to expose - i.e., they are values provided - * by the resource manager as opposed to the application */ -#define PMIX_ATTR_UNDEF NULL - -#define PMIX_CPUSET "pmix.cpuset" // (char*) hwloc bitmap applied to proc upon launch -#define PMIX_CREDENTIAL "pmix.cred" // (opal_byte_object*) security credential assigned to proc -#define PMIX_HOSTNAME "pmix.hname" // (char*) name of the host this proc is on -/* scratch directory locations for use by applications */ -#define PMIX_TMPDIR "pmix.tmpdir" // (char*) top-level tmp dir assigned to session -/* information about relative ranks as assigned */ -#define PMIX_JOBID "pmix.jobid" // (char*) jobid assigned by scheduler -#define PMIX_APPNUM "pmix.appnum" // (uint32_t) app number within the job -#define PMIX_RANK "pmix.rank" // (uint32_t) process rank within the job -#define PMIX_GLOBAL_RANK "pmix.grank" // (uint32_t) rank spanning across all jobs in this session -#define PMIX_APP_RANK "pmix.apprank" // (uint32_t) rank within this app -#define PMIX_NPROC_OFFSET "pmix.offset" // (uint32_t) starting global rank of this job -#define PMIX_LOCAL_RANK "pmix.lrank" // (uint16_t) rank on this node within this job -#define PMIX_NODE_RANK "pmix.nrank" // (uint16_t) rank on this node spanning all jobs -#define PMIX_LOCALLDR "pmix.lldr" // (uint64_t) opal_identifier of lowest rank on this node within this job -#define PMIX_APPLDR "pmix.aldr" // (uint32_t) lowest rank in this app within this job -#define PMIX_NODE_ID "pmix.nodeid" // (uint32_t) vpid of daemon hosting specified proc - -/* proc location-related info */ -#define PMIX_PROC_MAP "pmix.map" // (byte_object) packed map of proc locations within this job -#define PMIX_LOCAL_PEERS "pmix.lpeers" // (char*) comma-delimited string of ranks on this node within this job -#define PMIX_LOCAL_CPUSETS "pmix.lcpus" // (byte_object) packed names and cpusets of local peers -/* size info */ -#define PMIX_UNIV_SIZE "pmix.univ.size" // (uint32_t) #procs in this namespace -#define PMIX_JOB_SIZE "pmix.job.size" // (uint32_t) #procs in this job -#define PMIX_LOCAL_SIZE "pmix.local.size" // (uint32_t) #procs in this job on this node -#define PMIX_NODE_SIZE "pmix.node.size" // (uint32_t) #procs across all jobs on this node -#define PMIX_MAX_PROCS "pmix.max.size" // (uint32_t) max #procs for this job -/* topology info */ -#define PMIX_NET_TOPO "pmix.ntopo" // (byte_object) network topology -#define PMIX_LOCAL_TOPO "pmix.ltopo" // (hwloc topo) local node topology - /** * Provide a simplified macro for sending data via modex * to other processes. The macro requires four arguments: * * r - the integer return status from the modex op - * f - whether this modex requires sync or is async ready * sc - the PMIX scope of the data * s - the key to tag the data being posted * d - pointer to the data object being posted * t - the type of the data */ -#define OPAL_MODEX_SEND_VALUE(r, f, sc, s, d, t) \ - do { \ - opal_value_t kv; \ - if (PMIX_SYNC_REQD == (f)) { \ - opal_pmix_use_collective = true; \ - } \ - OBJ_CONSTRUCT(&kv, opal_value_t); \ - kv.key = (s); \ - if (OPAL_SUCCESS != ((r) = opal_value_load(&kv, (d), (t)))) { \ - OPAL_ERROR_LOG((r)); \ - } else { \ - if (OPAL_SUCCESS != ((r) = opal_pmix.put(sc, &kv))) { \ - OPAL_ERROR_LOG((r)); \ - } \ - } \ - /* do not destruct the keyval as we don't own */ \ - /* the data - the caller will take care of the */ \ - /* key and value storage, and the kv itself has none */ \ +#define OPAL_MODEX_SEND_VALUE(r, sc, s, d, t) \ + do { \ + opal_value_t _kv; \ + OBJ_CONSTRUCT(&(_kv), opal_value_t); \ + _kv.key = (s); \ + if (OPAL_SUCCESS != ((r) = opal_value_load(&(_kv), (d), (t)))) { \ + OPAL_ERROR_LOG((r)); \ + } else { \ + if (OPAL_SUCCESS != ((r) = opal_pmix.put(sc, &(_kv)))) { \ + OPAL_ERROR_LOG((r)); \ + } \ + } \ + /* do not destruct the keyval as we don't own */ \ + /* the data - the caller will take care of the */ \ + /* key and value storage, and the kv itself has none */ \ } while(0); /** @@ -147,29 +65,25 @@ typedef void (*opal_pmix_cbfunc_t)(int status, opal_value_t *kv, void *cbdata); * to other processes. The macro requires four arguments: * * r - the integer return status from the modex op - * f - whether this modex requires sync or is async ready * sc - the PMIX scope of the data * s - the key to tag the data being posted * d - the data object being posted * sz - the number of bytes in the data object */ -#define OPAL_MODEX_SEND_STRING(r, f, sc, s, d, sz) \ - do { \ - opal_value_t kv; \ - if (PMIX_SYNC_REQD == (f)) { \ - opal_pmix_use_collective = true; \ - } \ - OBJ_CONSTRUCT(&kv, opal_value_t); \ - kv.key = (s); \ - kv.type = OPAL_BYTE_OBJECT; \ - kv.data.bo.bytes = (uint8_t*)(d); \ - kv.data.bo.size = (sz); \ - if (OPAL_SUCCESS != ((r) = opal_pmix.put(sc, &kv))) { \ - OPAL_ERROR_LOG((r)); \ - } \ - kv.data.bo.bytes = NULL; /* protect the data */ \ - kv.key = NULL; /* protect the key */ \ - OBJ_DESTRUCT(&kv); \ +#define OPAL_MODEX_SEND_STRING(r, sc, s, d, sz) \ + do { \ + opal_value_t _kv; \ + OBJ_CONSTRUCT(&(_kv), opal_value_t); \ + _kv.key = (s); \ + _kv.type = OPAL_BYTE_OBJECT; \ + _kv.data.bo.bytes = (uint8_t*)(d); \ + _kv.data.bo.size = (sz); \ + if (OPAL_SUCCESS != ((r) = opal_pmix.put(sc, &(_kv)))) { \ + OPAL_ERROR_LOG((r)); \ + } \ + _kv.data.bo.bytes = NULL; /* protect the data */ \ + _kv.key = NULL; /* protect the key */ \ + OBJ_DESTRUCT(&(_kv)); \ } while(0); /** @@ -177,21 +91,17 @@ typedef void (*opal_pmix_cbfunc_t)(int status, opal_value_t *kv, void *cbdata); * to other processes. The macro requires four arguments: * * r - the integer return status from the modex op - * f - whether this modex requires sync or is async ready * sc - the PMIX scope of the data * s - the MCA component that is posting the data * d - the data object being posted * sz - the number of bytes in the data object */ -#define OPAL_MODEX_SEND(r, f, sc, s, d, sz) \ +#define OPAL_MODEX_SEND(r, sc, s, d, sz) \ do { \ - char *key; \ - if (PMIX_SYNC_REQD == (f)) { \ - opal_pmix_use_collective = true; \ - } \ - key = mca_base_component_to_string((s)); \ - OPAL_MODEX_SEND_STRING((r), (f), (sc), key, (d), (sz)); \ - free(key); \ + char *_key; \ + _key = mca_base_component_to_string((s)); \ + OPAL_MODEX_SEND_STRING((r), (sc), _key, (d), (sz)); \ + free(_key); \ } while(0); /** @@ -200,21 +110,20 @@ typedef void (*opal_pmix_cbfunc_t)(int status, opal_value_t *kv, void *cbdata); * * r - the integer return status from the modex op (int) * s - string key (char*) - * p - pointer to the opal_proc_t of the proc that posted - * the data (opal_proc_t*) + * p - pointer to the opal_process_name_t of the proc that posted + * the data (opal_process_name_t*) * d - pointer to a location wherein the data object * is to be returned * t - the expected data type */ #define OPAL_MODEX_RECV_VALUE(r, s, p, d, t) \ do { \ - opal_value_t *kv; \ - if (OPAL_SUCCESS != ((r) = opal_pmix.get(&(p)->proc_name, \ - (s), &kv))) { \ + opal_value_t *_kv; \ + if (OPAL_SUCCESS != ((r) = opal_pmix.get((p), (s), &(_kv)))) { \ *(d) = NULL; \ } else { \ - (r) = opal_value_unload(kv, (void**)(d), (t)); \ - OBJ_RELEASE(kv); \ + (r) = opal_value_unload(_kv, (void**)(d), (t)); \ + OBJ_RELEASE(_kv); \ } \ } while(0); @@ -224,8 +133,8 @@ typedef void (*opal_pmix_cbfunc_t)(int status, opal_value_t *kv, void *cbdata); * * r - the integer return status from the modex op (int) * s - string key (char*) - * p - pointer to the opal_proc_t of the proc that posted - * the data (opal_proc_t*) + * p - pointer to the opal_process_name_t of the proc that posted + * the data (opal_process_name_t*) * d - pointer to a location wherein the data object * it to be returned (char**) * sz - pointer to a location wherein the number of bytes @@ -233,14 +142,13 @@ typedef void (*opal_pmix_cbfunc_t)(int status, opal_value_t *kv, void *cbdata); */ #define OPAL_MODEX_RECV_STRING(r, s, p, d, sz) \ do { \ - opal_value_t *kv; \ - if (OPAL_SUCCESS == ((r) = opal_pmix.get(&(p)->proc_name, \ - (s), &kv)) && \ - NULL != kv) { \ - *(d) = kv->data.bo.bytes; \ - *(sz) = kv->data.bo.size; \ - kv->data.bo.bytes = NULL; /* protect the data */ \ - OBJ_RELEASE(kv); \ + opal_value_t *_kv; \ + if (OPAL_SUCCESS == ((r) = opal_pmix.get((p), (s), &(_kv))) && \ + NULL != _kv) { \ + *(d) = _kv->data.bo.bytes; \ + *(sz) = _kv->data.bo.size; \ + _kv->data.bo.bytes = NULL; /* protect the data */ \ + OBJ_RELEASE(_kv); \ } else { \ *(d) = NULL; \ *(sz) = 0; \ @@ -253,8 +161,8 @@ typedef void (*opal_pmix_cbfunc_t)(int status, opal_value_t *kv, void *cbdata); * * r - the integer return status from the modex op (int) * s - the MCA component that posted the data (mca_base_component_t*) - * p - pointer to the opal_proc_t of the proc that posted - * the data (opal_proc_t*) + * p - pointer to the opal_process_name_t of the proc that posted + * the data (opal_process_name_t*) * d - pointer to a location wherein the data object * it to be returned (char**) * sz - pointer to a location wherein the number of bytes @@ -262,14 +170,14 @@ typedef void (*opal_pmix_cbfunc_t)(int status, opal_value_t *kv, void *cbdata); */ #define OPAL_MODEX_RECV(r, s, p, d, sz) \ do { \ - char *key; \ - key = mca_base_component_to_string((s)); \ - if (NULL == key) { \ + char *_key; \ + _key = mca_base_component_to_string((s)); \ + if (NULL == _key) { \ OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE); \ (r) = OPAL_ERR_OUT_OF_RESOURCE; \ } else { \ - OPAL_MODEX_RECV_STRING((r), key, (p), (d), (sz)); \ - free(key); \ + OPAL_MODEX_RECV_STRING((r), _key, (p), (d), (sz)); \ + free(_key); \ } \ } while(0); @@ -279,148 +187,496 @@ typedef void (*opal_pmix_cbfunc_t)(int status, opal_value_t *kv, void *cbdata); * that takes into account directives and availability of * non-blocking operations */ -#define OPAL_FENCE(p, s, cf, cd) \ - opal_pmix.fence((p), (s)); +#define OPAL_MODEX(p, s) \ + do { \ + opal_pmix.commit(); \ + opal_pmix.fence((p), (s)); \ + } while(0); -/**** DEFINE THE PUBLIC API'S **** - **** NOTE THAT WE DO NOT HAVE A 1:1 MAPPING OF APIs **** - **** HERE TO THOSE CURRENTLY DEFINED BY PMI AS WE **** - **** DON'T USE SOME OF THOSE FUNCTIONS AND THIS ISN'T **** - **** A GENERAL LIBRARY ****/ +/* callback handler for errors */ +typedef void (*opal_pmix_errhandler_fn_t)(int error); -/***** APIs CURRENTLY USED IN THE OMPI/ORTE CODE BASE ****/ /* NOTE: calls to these APIs must be thread-protected as there * currently is NO internal thread safety. */ -/* Init */ + +/************************************************************ + * CLIENT APIs * + ************************************************************/ + +/* Initialize the PMIx client + * When called the client will check for the required connection + * information of the local server and will establish the connection. + * If the information is not found, or the server connection fails, then + * an appropriate error constant will be returned. + */ typedef int (*opal_pmix_base_module_init_fn_t)(void); -/* Finalize */ +/* Finalize the PMIx client, closing the connection to the local server. + * An error code will be returned if, for some reason, the connection + * cannot be closed. */ typedef int (*opal_pmix_base_module_fini_fn_t)(void); -/* Initialized */ -typedef bool (*opal_pmix_base_module_initialized_fn_t)(void); +/* Returns _true_ if the PMIx client has been successfully initialized, + * returns _false_ otherwise. Note that the function only reports the + * internal state of the PMIx client - it does not verify an active + * connection with the server, nor that the server is functional. */ +typedef int (*opal_pmix_base_module_initialized_fn_t)(void); -/* Abort */ -typedef int (*opal_pmix_base_module_abort_fn_t)(int flag, const char msg[]); +/* Request that the provided list of opal_namelist_t procs be aborted, returning the + * provided _status_ and printing the provided message. A _NULL_ + * for the proc list indicates that all processes in the caller's + * nspace are to be aborted. + * + * The response to this request is somewhat dependent on the specific resource + * manager and its configuration (e.g., some resource managers will + * not abort the application if the provided _status_ is zero unless + * specifically configured to do so), and thus lies outside the control + * of PMIx itself. However, the client will inform the RM of + * the request that the application be aborted, regardless of the + * value of the provided _status_. + * + * Passing a _NULL_ msg parameter is allowed. Note that race conditions + * caused by multiple processes calling PMIx_Abort are left to the + * server implementation to resolve with regard to which status is + * returned and what messages (if any) are printed. + */ +typedef int (*opal_pmix_base_module_abort_fn_t)(int status, const char *msg, + opal_list_t *procs); -/* Fence - note that this call is required to commit any - * data "put" to the system since the last call to "fence" - * prior to (or as part of) executing the barrier. Serves both PMI2 - * and PMI1 "barrier" purposes */ -typedef int (*opal_pmix_base_module_fence_fn_t)(opal_process_name_t *procs, size_t nprocs); +/* Push all previously _PMIx_Put_ values to the local PMIx server. + * This is an asynchronous operation - the library will immediately + * return to the caller while the data is transmitted to the local + * server in the background */ +typedef int (*opal_pmix_base_module_commit_fn_t)(void); -/* Fence_nb - not included in the current PMI standard. This is a non-blocking - * version of the standard "fence" call. All subsequent "get" calls will block - * pending completion of this operation. Non-blocking "get" calls will still - * complete as data becomes available */ -typedef int (*opal_pmix_base_module_fence_nb_fn_t)(opal_process_name_t *procs, size_t nprocs, - opal_pmix_cbfunc_t cbfunc, void *cbdata); +/* Execute a blocking barrier across the processes identified in the + * specified list of opal_namelist_t. Passing a _NULL_ pointer + * indicates that the barrier is to span all processes in the client's + * namespace. Each provided opal_namelist_t can pass PMIX_RANK_WILDCARD to + * indicate that all processes in the given jobid are + * participating. + * + * The _collect_data_ parameter is passed to the server to indicate whether + * or not the barrier operation is to return the _put_ data from all + * participating processes. A value of _false_ indicates that the callback + * is just used as a release and no data is to be returned at that time. A + * value of _true_ indicates that all _put_ data is to be collected by the + * barrier. Returned data is locally cached so that subsequent calls to _PMIx_Get_ + * can be serviced without communicating to/from the server, but at the cost + * of increased memory footprint + */ +typedef int (*opal_pmix_base_module_fence_fn_t)(opal_list_t *procs, int collect_data); -/* Put - note that this API has been modified from the current PMI standard to - * reflect the proposed PMIx extensions. */ +/* Fence_nb */ +/* Non-blocking version of PMIx_Fence. Note that the function will return + * an error if a _NULL_ callback function is given. */ +typedef int (*opal_pmix_base_module_fence_nb_fn_t)(opal_list_t *procs, int collect_data, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); + +/* Push a value into the client's namespace. The client library will cache + * the information locally until _PMIx_Commit_ is called. The provided scope + * value is passed to the local PMIx server, which will distribute the data + * as directed. */ typedef int (*opal_pmix_base_module_put_fn_t)(opal_pmix_scope_t scope, - opal_value_t *kv); + opal_value_t *val); -/* Get - note that this API has been modified from the current PMI standard to - * reflect the proposed PMIx extensions, and to include the process identifier so - * we can form the PMI key within the active component instead of sprinkling that - * code all over the code base. */ -typedef int (*opal_pmix_base_module_get_fn_t)(const opal_process_name_t *id, +/* Retrieve information for the specified _key_ as published by the rank + * and jobid i the provided opal_process_name, returning a pointer to the value in the + * given address. + * + * This is a blocking operation - the caller will block until + * the specified data has been _PMIx_Put_ by the specified rank. The caller is + * responsible for freeing all memory associated with the returned value when + * no longer required. */ +typedef int (*opal_pmix_base_module_get_fn_t)(const opal_process_name_t *proc, const char *key, - opal_value_t **kv); + opal_value_t **val); -/* Get_nb - not included in the current PMI standard. This is a non-blocking - * version of the standard "get" call. Retrieved value will be provided as - * opal_value_t object in the callback. We include the process identifier so - * we can form the PMI key within the active component instead of sprinkling that - * code all over the code base. */ -typedef void (*opal_pmix_base_module_get_nb_fn_t)(const opal_process_name_t *id, +/* Retrieve information for the specified _key_ as published by the given rank + * and jobid in the opal_process_name_t. This is a non-blocking operation - the + * callback function will be executed once the specified data has been _PMIx_Put_ + * by the specified proc and retrieved by the local server. */ +typedef int (*opal_pmix_base_module_get_nb_fn_t)(const opal_process_name_t *proc, const char *key, - opal_pmix_cbfunc_t cbfunc, - void *cbdata); + opal_pmix_value_cbfunc_t cbfunc, void *cbdata); -/* Publish - the "info" parameter - * consists of a list of pmix_info_t objects */ -typedef int (*opal_pmix_base_module_publish_fn_t)(const char service_name[], - opal_list_t *info, - const char port[]); +/* Publish the given data to the "universal" nspace + * for lookup by others subject to the provided scope. + * Note that the keys must be unique within the specified + * scope or else an error will be returned (first published + * wins). Attempts to access the data by procs outside of + * the provided scope will be rejected. + * + * Note: Some host environments may support user/group level + * access controls on the information in addition to the scope. + * These can be specified in the info array using the appropriately + * defined keys. + * + * The persistence parameter instructs the server as to how long + * the data is to be retained, within the context of the scope. + * For example, data published within _PMIX_NAMESPACE_ will be + * deleted along with the namespace regardless of the persistence. + * However, data published within PMIX_USER would be retained if + * the persistence was set to _PMIX_PERSIST_SESSION_ until the + * allocation terminates. + * + * The blocking form will block until the server confirms that the + * data has been posted and is available. The non-blocking form will + * return immediately, executing the callback when the server confirms + * availability of the data */ +typedef int (*opal_pmix_base_module_publish_fn_t)(opal_pmix_data_range_t scope, + opal_pmix_persistence_t persist, + opal_list_t *info); +typedef int (*opal_pmix_base_module_publish_nb_fn_t)(opal_pmix_data_range_t scope, + opal_pmix_persistence_t persist, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); -/* Lookup - the "info" parameter - * consists of a list of pmix_info_t objects */ -typedef int (*opal_pmix_base_module_lookup_fn_t)(const char service_name[], - opal_list_t *info, - char port[], int portLen); +/* Lookup information published by another process within the + * specified scope. A scope of _PMIX_SCOPE_UNDEF_ requests that + * the search be conducted across _all_ namespaces. The "data" + * parameter consists of an array of pmix_pdata_t struct with the + * keys specifying the requested information. Data will be returned + * for each key in the associated info struct - any key that cannot + * be found will return with a data type of "PMIX_UNDEF". The function + * will return SUCCESS if _any_ values can be found, so the caller + * must check each data element to ensure it was returned. + * + * The proc field in each pmix_pdata_t struct will contain the + * nspace/rank of the process that published the data. + * + * Note: although this is a blocking function, it will _not_ wait + * for the requested data to be published. Instead, it will block + * for the time required by the server to lookup its current data + * and return any found items. Thus, the caller is responsible for + * ensuring that data is published prior to executing a lookup, or + * for retrying until the requested data is found */ +typedef int (*opal_pmix_base_module_lookup_fn_t)(opal_pmix_data_range_t scope, + opal_list_t *data); -/* Unpublish - the "info" parameter - * consists of a list of pmix_info_t objects */ -typedef int (*opal_pmix_base_module_unpublish_fn_t)(const char service_name[], - opal_list_t *info); +/* Non-blocking form of the _PMIx_Lookup_ function. Data for + * the provided NULL-terminated keys array will be returned + * in the provided callback function. The _wait_ parameter + * is used to indicate if the caller wishes the callback to + * wait for _all_ requested data before executing the callback + * (_true_), or to callback once the server returns whatever + * data is immediately available (_false_) */ +typedef int (*opal_pmix_base_module_lookup_nb_fn_t)(opal_pmix_data_range_t scope, int wait, char **keys, + opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata); -/* Get attribute - * Query the server for the specified attribute, returning it in the - * provided opal_value_t. The function will return "true" if the attribute - * is found, and "false" if not. - * Attributes are provided by the PMIx server, so there is no corresponding - * "put" function. */ -typedef bool (*opal_pmix_base_module_get_attr_fn_t)(const char *attr, opal_value_t **kv); +/* Unpublish data posted by this process using the given keys + * within the specified scope. The function will block until + * the data has been removed by the server. A value of _NULL_ + * for the keys parameter instructs the server to remove + * _all_ data published by this process within the given scope */ +typedef int (*opal_pmix_base_module_unpublish_fn_t)(opal_pmix_data_range_t scope, char **keys); -/* Get attribute (non-blocking) - * Query the server for the specified attribute.. - * Attributes are provided by the PMIx server, so there is no corresponding "put" - * function. The call will be executed as non-blocking, returning immediately, - * with data resulting from the call returned in the callback function. A returned - * NULL opal_value_t* indicates that the attribute was not found. The returned - * pointer is "owned" by the PMIx module and must not be released by the - * callback function */ -typedef int (*opal_pmix_base_module_get_attr_nb_fn_t)(const char *attr, - opal_pmix_cbfunc_t cbfunc, - void *cbdata); +/* Non-blocking form of the _PMIx_Unpublish_ function. The + * callback function will be executed once the server confirms + * removal of the specified data. A value of _NULL_ + * for the keys parameter instructs the server to remove + * _all_ data published by this process within the given scope */ +typedef int (*opal_pmix_base_module_unpublish_nb_fn_t)(opal_pmix_data_range_t scope, char **keys, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); + +/* Spawn a new job. The spawned applications are automatically + * connected to the calling process, and their assigned namespace + * is returned in the nspace parameter - a _NULL_ value in that + * location indicates that the caller doesn't wish to have the + * namespace returned. Behavior of individual resource managers + * may differ, but it is expected that failure of any application + * process to start will result in termination/cleanup of _all_ + * processes in the newly spawned job and return of an error + * code to the caller */ +typedef int (*opal_pmix_base_module_spawn_fn_t)(opal_list_t *job_info, + opal_list_t *apps, + opal_jobid_t *jobid); + +/* Non-blocking form of the _PMIx_Spawn_ function. The callback + * will be executed upon launch of the specified applications, + * or upon failure to launch any of them. */ +typedef int (*opal_pmix_base_module_spawn_nb_fn_t)(opal_list_t *job_info, + opal_list_t *apps, + opal_pmix_spawn_cbfunc_t cbfunc, + void *cbdata); + +/* Record the specified processes as "connected". Both blocking and non-blocking + * versions are provided. This means that the resource manager should treat the + * failure of any process in the specified group as a reportable event, and take + * appropriate action. Note that different resource managers may respond to + * failures in different manners. + * + * The list is to be provided as opal_namelist_t objects + * + * The callback function is to be called once all participating processes have + * called connect. The server is required to return any job-level info for the + * connecting processes that might not already have - i.e., if the connect + * request involves procs from different nspaces, then each proc shall receive + * the job-level info from those nspaces other than their own. + * + * Note: a process can only engage in _one_ connect operation involving the identical + * set of ranges at a time. However, a process _can_ be simultaneously engaged + * in multiple connect operations, each involving a different set of ranges */ +typedef int (*opal_pmix_base_module_connect_fn_t)(opal_list_t *procs); + +typedef int (*opal_pmix_base_module_connect_nb_fn_t)(opal_list_t *procs, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); + +/* Disconnect a previously connected set of processes. An error will be returned + * if the specified set of procs was not previously "connected". As above, a process + * may be involved in multiple simultaneous disconnect operations. However, a process + * is not allowed to reconnect to a set of procs that has not fully completed + * disconnect - i.e., you have to fully disconnect before you can reconnect to the + * _same_ group of processes. */ +typedef int (*opal_pmix_base_module_disconnect_fn_t)(opal_list_t *procs); + +typedef int (*opal_pmix_base_module_disconnect_nb_fn_t)(opal_list_t *procs, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); + +/* Given a node name, return an array of processes within the specified jobid + * on that node. If the jobid is OPAL_JOBID_WILDCARD, then all processes on the node will + * be returned. If the specified node does not currently host any processes, + * then the returned list will be empty. + */ +typedef int (*opal_pmix_base_module_resolve_peers_fn_t)(const char *nodename, + opal_jobid_t jobid, + opal_list_t *procs); -/**** APIs NOT CURRENTLY USED IN THE OMPI/ORTE CODE BASE, BUT THAT **** - **** MAY BE IMPLEMENTED IN THE NEAR FUTURE. COMPONENTS ARE FREE TO **** - **** JUST HAVE THEM RETURN "OPAL_ERR_NOT_IMPLEMENTED" ****/ +/* Given a jobid, return the list of nodes hosting processes within + * that jobid. The returned string will contain a comma-delimited list + * of nodenames. The caller is responsible for releasing the string + * when done with it */ +typedef int (*opal_pmix_base_module_resolve_nodes_fn_t)(opal_jobid_t jobid, char **nodelist); -/* PMI2_Job_Spawn */ -typedef int (*opal_pmix_base_module_spawn_fn_t)(int count, const char * cmds[], - int argcs[], const char ** argvs[], - const int maxprocs[], - opal_list_t *info_keyval_vector, - opal_list_t *preput_keyval_vector, - char jobId[], int jobIdSize, - int errors[]); -/* PMI2_Job_Connect */ -typedef int (*opal_pmix_base_module_job_connect_fn_t)(const char jobId[]); +/************************************************************ + * SERVER APIs * + ************************************************************/ -/* PMI2_Job_Disconnect */ -typedef int (*opal_pmix_base_module_job_disconnect_fn_t)(const char jobId[]); +/* Initialize the server support library */ +typedef int (*opal_pmix_base_module_server_init_fn_t)(opal_pmix_server_module_t *module); +/* Finalize the server support library */ +typedef int (*opal_pmix_base_module_server_finalize_fn_t)(void); + +/* given a semicolon-separated list of input values, generate + * a regex that can be passed down to the client for parsing. + * The caller is responsible for free'ing the resulting + * string + * + * If values have leading zero's, then that is preserved. You + * have to add back any prefix/suffix for node names + * odin[009-015,017-023,076-086] + * + * "pmix:odin[009-015,017-023,076-086]" + * + * Note that the "pmix" at the beginning of each regex indicates + * that the PMIx native parser is to be used by the client for + * parsing the provided regex. Other parsers may be supported - see + * the pmix_client.h header for a list. + */ +typedef int (*opal_pmix_base_module_generate_regex_fn_t)(const char *input, char **regex); + +/* The input is expected to consist of a comma-separated list + * of ranges. Thus, an input of: + * "1-4;2-5;8,10,11,12;6,7,9" + * would generate a regex of + * "[pmix:2x(3);8,10-12;6-7,9]" + * + * Note that the "pmix" at the beginning of each regex indicates + * that the PMIx native parser is to be used by the client for + * parsing the provided regex. Other parsers may be supported - see + * the pmix_client.h header for a list. + */ +typedef int (*opal_pmix_base_module_generate_ppn_fn_t)(const char *input, char **ppn); + +/* Setup the data about a particular nspace so it can + * be passed to any child process upon startup. The PMIx + * connection procedure provides an opportunity for the + * host PMIx server to pass job-related info down to a + * child process. This might include the number of + * processes in the job, relative local ranks of the + * processes within the job, and other information of + * use to the process. The server is free to determine + * which, if any, of the supported elements it will + * provide - defined values are provided in pmix_common.h. + * + * NOTE: the server must register ALL nspaces that will + * participate in collective operations with local processes. + * This means that the server must register an nspace even + * if it will not host any local procs from within that + * nspace IF any local proc might at some point perform + * a collective operation involving one or more procs from + * that nspace. This is necessary so that the collective + * operation can know when it is locally complete. + * + * The caller must also provide the number of local procs + * that will be launched within this nspace. This is required + * for the PMIx server library to correctly handle collectives + * as a collective operation call can occur before all the + * procs have been started */ +typedef int (*opal_pmix_base_module_server_register_nspace_fn_t)(opal_jobid_t jobid, + int nlocalprocs, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); + +/* Register a client process with the PMIx server library. The + * expected user ID and group ID of the child process helps the + * server library to properly authenticate clients as they connect + * by requiring the two values to match. + * + * The host server can also, if it desires, provide an object + * it wishes to be returned when a server function is called + * that relates to a specific process. For example, the host + * server may have an object that tracks the specific client. + * Passing the object to the library allows the library to + * return that object when the client calls "finalize", thus + * allowing the host server to access the object without + * performing a lookup. */ +typedef int (*opal_pmix_base_module_server_register_client_fn_t)(const opal_process_name_t *proc, + uid_t uid, gid_t gid, + void *server_object, + opal_pmix_op_cbfunc_t cbfunc, + void *cbdata); + +/* Setup the environment of a child process to be forked + * by the host so it can correctly interact with the PMIx + * server. The PMIx client needs some setup information + * so it can properly connect back to the server. This function + * will set appropriate environmental variables for this purpose. */ +typedef int (*opal_pmix_base_module_server_setup_fork_fn_t)(const opal_process_name_t *proc, char ***env); + +/* Define a function by which the host server can request modex data + * from the local PMIx server. This is used to support the direct modex + * operation - i.e., where data is cached locally on each PMIx + * server for its own local clients, and is obtained on-demand + * for remote requests. Upon receiving a request from a remote + * server, the host server will call this function to pass the + * request into the PMIx server. The PMIx server will return a blob + * (once it becomes available) via the cbfunc - the host + * server shall send the blob back to the original requestor */ +typedef int (*opal_pmix_base_module_server_dmodex_request_fn_t)(const opal_process_name_t *proc, + opal_pmix_modex_cbfunc_t cbfunc, + void *cbdata); + +/* Report an error to a process for notification via any + * registered errhandler. The errhandler registration can be + * called by both the server and the client application. On the + * server side, the errhandler is used to report errors detected + * by PMIx to the host server for handling. On the client side, + * the errhandler is used to notify the process of errors + * reported by the server - e.g., the failure of another process. + * + * This function allows the host server to direct the server + * convenience library to notify all indicated local procs of + * an error. The error can be local, or anywhere in the cluster. + * The status indicates the error being reported. + * + * The first array of procs informs the server library as to which + * processes should be alerted - e.g., the processes that are in + * a directly-affected job or are connected to one that is affected. + * Passing a NULL for this array will indicate that all local procs + * are to be notified. + * + * The second array identifies the processes that will be impacted + * by the error. This could consist of a single process, or a number + * of processes. + * + * The info array contains any further info the RM can and/or chooses + * to provide. + * + * If the payload and size parameters are non-NULL, then the function + * will assume that the caller intends to send the message itself. In + * this situation, the convenience library will simply pack the message + * for transmission, and return the payload and size in the provided + * variables (external comm should have been indicated during server_init). + * The caller will be responsible for thread protection. + * + * Otherwise, the convenience library will transmit the message to + * the identified target processes, and the function call will be + * internally thread protected. + * + * The callback function will be called upon completion of the + * notify_error function's actions. Note that any messages will + * have been queued, but may not have been transmitted by this + * time. Note that the caller is required to maintain the input + * data until the callback function has been executed! */ +typedef int (*opal_pmix_base_module_server_notify_error_fn_t)(int status, + opal_list_t *procs, + opal_list_t *error_procs, + opal_list_t *info, + opal_pmix_op_cbfunc_t cbfunc, void *cbdata); + + +/************************************************************ + * UTILITY APIs * + ************************************************************/ + +/* get the version of the embedded library */ +typedef const char* (*opal_pmix_base_module_get_version_fn_t)(void); + +/* register an errhandler to report loss of connection to the server */ +typedef void (*opal_pmix_base_module_register_fn_t)(opal_pmix_errhandler_fn_t errhandler); + +/* deregister the errhandler */ +typedef void (*opal_pmix_base_module_deregister_fn_t)(void); + +/* store data internally, but don't push it out to be shared - this is + * intended solely for storage of info on other procs that comes thru + * a non-PMIx channel (e.g., may be computed locally) but is desired + * to be available via a PMIx_Get call */ +typedef int (*opal_pmix_base_module_store_fn_t)(const opal_process_name_t *proc, + opal_value_t *val); /* * the standard public API data structure */ typedef struct { - /* currently used APIs */ + /* client APIs */ opal_pmix_base_module_init_fn_t init; opal_pmix_base_module_fini_fn_t finalize; opal_pmix_base_module_initialized_fn_t initialized; opal_pmix_base_module_abort_fn_t abort; + opal_pmix_base_module_commit_fn_t commit; opal_pmix_base_module_fence_fn_t fence; opal_pmix_base_module_fence_nb_fn_t fence_nb; opal_pmix_base_module_put_fn_t put; opal_pmix_base_module_get_fn_t get; opal_pmix_base_module_get_nb_fn_t get_nb; opal_pmix_base_module_publish_fn_t publish; + opal_pmix_base_module_publish_nb_fn_t publish_nb; opal_pmix_base_module_lookup_fn_t lookup; + opal_pmix_base_module_lookup_nb_fn_t lookup_nb; opal_pmix_base_module_unpublish_fn_t unpublish; - opal_pmix_base_module_get_attr_fn_t get_attr; - opal_pmix_base_module_get_attr_nb_fn_t get_attr_nb; - /* currently unused APIs */ + opal_pmix_base_module_unpublish_nb_fn_t unpublish_nb; opal_pmix_base_module_spawn_fn_t spawn; - opal_pmix_base_module_job_connect_fn_t job_connect; - opal_pmix_base_module_job_disconnect_fn_t job_disconnect; + opal_pmix_base_module_spawn_nb_fn_t spawn_nb; + opal_pmix_base_module_connect_fn_t connect; + opal_pmix_base_module_connect_nb_fn_t connect_nb; + opal_pmix_base_module_disconnect_fn_t disconnect; + opal_pmix_base_module_disconnect_nb_fn_t disconnect_nb; + opal_pmix_base_module_resolve_peers_fn_t resolve_peers; + opal_pmix_base_module_resolve_nodes_fn_t resolve_nodes; + /* server APIs */ + opal_pmix_base_module_server_init_fn_t server_init; + opal_pmix_base_module_server_finalize_fn_t server_finalize; + opal_pmix_base_module_generate_regex_fn_t generate_regex; + opal_pmix_base_module_generate_ppn_fn_t generate_ppn; + opal_pmix_base_module_server_register_nspace_fn_t server_register_nspace; + opal_pmix_base_module_server_register_client_fn_t server_register_client; + opal_pmix_base_module_server_setup_fork_fn_t server_setup_fork; + opal_pmix_base_module_server_dmodex_request_fn_t server_dmodex_request; + opal_pmix_base_module_server_notify_error_fn_t server_notify_error; + /* Utility APIs */ + opal_pmix_base_module_get_version_fn_t get_version; + opal_pmix_base_module_register_fn_t register_errhandler; + opal_pmix_base_module_deregister_fn_t deregister_errhandler; + opal_pmix_base_module_store_fn_t store_local; } opal_pmix_base_module_t; typedef struct { @@ -438,9 +694,6 @@ typedef struct { /* Global structure for accessing store functions */ OPAL_DECLSPEC extern opal_pmix_base_module_t opal_pmix; /* holds base function pointers */ -/* flag to indicate collective vs direct fence operations */ -OPAL_DECLSPEC extern bool opal_pmix_use_collective; - END_C_DECLS #endif diff --git a/opal/mca/pmix/pmix1xx/Makefile.am b/opal/mca/pmix/pmix1xx/Makefile.am new file mode 100644 index 0000000000..d57064b2b9 --- /dev/null +++ b/opal/mca/pmix/pmix1xx/Makefile.am @@ -0,0 +1,49 @@ +# +# Copyright (c) 2014-2015 Intel, Inc. All rights reserved. +# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +EXTRA_DIST = autogen.subdirs + +SUBDIRS = pmix + +sources = \ + pmix1.h \ + pmix_pmix1_component.c \ + pmix_pmix1.c \ + pmix1_client.c \ + pmix1_server_south.c \ + pmix1_server_north.c + +# Make the output library in this directory, and name it either +# mca__.la (for DSO builds) or libmca__.la +# (for static builds). + +if MCA_BUILD_opal_pmix_pmix1xx_DSO +component_noinst = +component_install = mca_pmix_pmix1xx.la +else +component_noinst = libmca_pmix_pmix1xx.la +component_install = +endif + +mcacomponentdir = $(opallibdir) +mcacomponent_LTLIBRARIES = $(component_install) +mca_pmix_pmix1xx_la_SOURCES = $(sources) +mca_pmix_pmix1xx_la_CFLAGS = $(opal_pmix_pmix1xx_CFLAGS) +mca_pmix_pmix1xx_la_CPPFLAGS = \ + -I$(srcdir)/pmix/include $(opal_pmix_pmix1xx_CPPFLAGS) +mca_pmix_pmix1xx_la_LDFLAGS = -module -avoid-version $(opal_pmix_pmix1xx_LDFLAGS) +mca_pmix_pmix1xx_la_LIBADD = $(opal_pmix_pmix1xx_LIBS) + +noinst_LTLIBRARIES = $(component_noinst) +libmca_pmix_pmix1xx_la_SOURCES =$(sources) +libmca_pmix_pmix1xx_la_CFLAGS = $(opal_pmix_pmix1xx_CFLAGS) +libmca_pmix_pmix1xx_la_CPPFLAGS = $(opal_pmix_pmix1xx_CPPFLAGS) +libmca_pmix_pmix1xx_la_LDFLAGS = -module -avoid-version $(opal_pmix_pmix1xx_LDFLAGS) +libmca_pmix_pmix1xx_la_LIBADD = $(opal_pmix_pmix1xx_LIBS) diff --git a/opal/mca/pmix/pmix1xx/autogen.subdirs b/opal/mca/pmix/pmix1xx/autogen.subdirs new file mode 100644 index 0000000000..f4fd6e846e --- /dev/null +++ b/opal/mca/pmix/pmix1xx/autogen.subdirs @@ -0,0 +1 @@ +pmix diff --git a/opal/mca/pmix/pmix1xx/configure.m4 b/opal/mca/pmix/pmix1xx/configure.m4 new file mode 100644 index 0000000000..9c80ffd7df --- /dev/null +++ b/opal/mca/pmix/pmix1xx/configure.m4 @@ -0,0 +1,82 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2011-2013 Los Alamos National Security, LLC. +# All rights reserved. +# Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2013-2015 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_pmix_pmix1xx_CONFIG([action-if-found], [action-if-not-found]) +# ----------------------------------------------------------- +AC_DEFUN([MCA_opal_pmix_pmix1xx_CONFIG],[ + AC_CONFIG_FILES([opal/mca/pmix/pmix1xx/Makefile]) + + OPAL_VAR_SCOPE_PUSH([PMIX_VERSION opal_pmix_pmix1xx_save_CPPFLAGS opal_pmix_pmix1xx_save_LDFLAGS opal_pmix_pmix1xx_save_LIBS opal_pmix_pmix1xx_basedir opal_pmix_pmix1xx_save_cflags]) + + PMIX_VERSION= + opal_pmix_pmix1xx_basedir=opal/mca/pmix/pmix1xx + + opal_pmix_pmix1xx_save_CFLAGS=$CFLAGS + opal_pmix_pmix1xx_save_CPPFLAGS=$CPPFLAGS + opal_pmix_pmix1xx_save_LDFLAGS=$LDFLAGS + opal_pmix_pmix1xx_save_LIBS=$LIBS + + opal_pmix_pmix1xx_args="--enable-embedded-mode --with-pmix-symbol-prefix=opal_pmix_pmix1xx_ --with-libevent-header=\\\"opal/mca/event/$opal_event_base_include\\\" --with-hwloc-header=\\\"$opal_hwloc_base_include\\\"" + if test "$enable_debug" = "yes"; then + opal_pmix_pmix1xx_args = "--enable-debug $opal_pmix_pmix1xx_args" + CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS -g" + else + CFLAGS="$OPAL_CFLAGS_BEFORE_PICKY $OPAL_VISIBILITY_CFLAGS" + fi + CPPFLAGS="-I$OPAL_TOP_SRCDIR -I$OPAL_TOP_BUILDDIR -I$OPAL_TOP_SRCDIR/opal/include -I$OPAL_TOP_BUILDDIR/opal/include $CPPFLAGS" + export CFLAGS + export CPPFLAGS + + OPAL_CONFIG_SUBDIR([$opal_pmix_pmix1xx_basedir/pmix], + [$opal_pmix_pmix1xx_args $opal_subdir_args], + [opal_pmix_pmix1xx_happy=1], [opal_pmix_pmix1xx_happy=0]) + + if test $opal_pmix_pmix1xx_happy -eq 1; then + PMIX_VERSION="internal v`$srcdir/$opal_pmix_pmix1xx_basedir/pmix/config/pmix_get_version.sh $srcdir/$opal_pmix_pmix1xx_basedir/pmix/VERSION`" + # Build flags for our Makefile.am + opal_pmix_pmix1xx_LIBS='$(OPAL_TOP_BUILDDIR)/'"$opal_pmix_pmix1xx_basedir"'/pmix/libpmix.la' + opal_pmix_pmix1xx_CPPFLAGS='-I$(OPAL_TOP_BUILDDIR)/opal/mca/pmix/pmix1xx/pmix/include/pmix -I$(OPAL_TOP_BUILDDIR)/opal/mca/pmix/pmix1xx/pmix/include -I$(OPAL_TOP_BUILDDIR)/opal/mca/pmix/pmix1xx/pmix -I$(OPAL_TOP_SRCDIR)/opal/mca/pmix/pmix1xx/pmix' + AC_SUBST([opal_pmix_pmix1xx_LIBS]) + AC_SUBST([opal_pmix_pmix1xx_CPPFLAGS]) + fi + + AC_DEFINE_UNQUOTED([PMIX_PMIX1XX_PMIX_VERSION], + ["$PMIX_VERSION"], + [Version of PMIx]) + + # Finally, add some flags to the wrapper compiler if we're + # building with developer headers so that our headers can + # be found. + pmix1xx_WRAPPER_EXTRA_CPPFLAGS='-I${includedir}/openmpi/$opal_pmix_pmix1xx_basedir/pmix -I${includedir}/openmpi/$opal_pmix_pmix1xx_basedir/pmix/include' + + CFLAGS=$opal_pmix_pmix1xx_save_CFLAGS + CPPFLAGS=$opal_pmix_pmix1xx_save_CPPFLAGS + LDFLAGS=$opal_pmix_pmix1xx_save_LDFLAGS + LIBS=$opal_pmix_pmix1xx_save_LIBS + + AS_IF([test $opal_pmix_pmix1xx_happy -eq 1], + [$1], + [$2]) + + OPAL_VAR_SCOPE_POP +])dnl diff --git a/opal/mca/pmix/pmix1xx/pmix/INSTALL b/opal/mca/pmix/pmix1xx/pmix/INSTALL new file mode 100644 index 0000000000..74db17e5ff --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/INSTALL @@ -0,0 +1,88 @@ +Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + University Research and Technology + Corporation. All rights reserved. +Copyright (c) 2004-2005 The University of Tennessee and The University + of Tennessee Research Foundation. All rights + reserved. +Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + University of Stuttgart. All rights reserved. +Copyright (c) 2004-2005 The Regents of the University of California. + All rights reserved. +Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. +Copyright (c) 2013 Intel, Inc. All rights reserved. +$COPYRIGHT$ + +Additional copyrights may follow + +$HEADER$ + + +For More Information +==================== + +This file is a *very* short overview of building and installing +the PMIx library. Much more information is available on the +PMIx web site (e.g., see the FAQ section): + + http://www.open-mpi.org/projects/pmix + + +Developer Builds +================ + +If you have checked out a DEVELOPER'S COPY of PMIx (i.e., you checked +out from Git), you should read the HACKING file before attempting to +build PMIx. You must then run: + +shell$ ./autogen.pl + +You will need very recent versions of GNU Autoconf, Automake, and +Libtool. If autogen.pl fails, read the HACKING file. If anything +else fails, read the HACKING file. Finally, we suggest reading the +HACKING file. + +*** NOTE: Developer's copies of PMIx typically include a large +performance penalty at run-time because of extra debugging overhead. + + +User Builds +=========== + +Building PMIx is typically a combination of running "configure" +and "make". Execute the following commands to install the PMIx +system from within the directory at the top of the tree: + +shell$ ./configure --prefix=/where/to/install +[...lots of output...] +shell$ make all install + +If you need special access to install, then you can execute "make +all" as a user with write permissions in the build tree, and a +separate "make install" as a user with write permissions to the +install tree. + +Compiling support for various networks or other specific hardware may +require additional command ling flags when running configure. See the +README file for more details. Note that VPATH builds are fully +supported. For example: + +shell$ gtar zxf pmix-X.Y.Z.tar.gz +shell$ cd pmix-X.Y.Z +shell$ mkdir build +shell$ cd build +shell$ ../configure ...your options... +[...lots of output...] +shell$ make all install + +Parallel builds are also supported (although some versions of "make", +such as GNU make, will only use the first target listed on the command +line when executable parallel builds). For example (assume GNU make): + +shell$ make -j 4 all +[...lots of output...] +shell$ make install + +Parallel make is generally only helpful in the build phase; the +installation process is mostly serial and does not benefit much from +parallel make. + diff --git a/opal/mca/pmix/pmix1xx/pmix/LICENSE b/opal/mca/pmix/pmix1xx/pmix/LICENSE new file mode 100644 index 0000000000..469eedd6de --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/LICENSE @@ -0,0 +1,89 @@ +Most files in this release are marked with the copyrights of the +organizations who have edited them. The copyrights below are in no +particular order and generally reflect members of the Open MPI core +team who have contributed code to this release. The copyrights for +code used under license from other parties are included in the +corresponding files. + +Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana + University Research and Technology + Corporation. All rights reserved. +Copyright (c) 2004-2010 The University of Tennessee and The University + of Tennessee Research Foundation. All rights + reserved. +Copyright (c) 2004-2010 High Performance Computing Center Stuttgart, + University of Stuttgart. All rights reserved. +Copyright (c) 2004-2008 The Regents of the University of California. + All rights reserved. +Copyright (c) 2006-2010 Los Alamos National Security, LLC. All rights + reserved. +Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved. +Copyright (c) 2006-2010 Voltaire, Inc. All rights reserved. +Copyright (c) 2006-2011 Sandia National Laboratories. All rights reserved. +Copyright (c) 2006-2010 Sun Microsystems, Inc. All rights reserved. + Use is subject to license terms. +Copyright (c) 2006-2010 The University of Houston. All rights reserved. +Copyright (c) 2006-2009 Myricom, Inc. All rights reserved. +Copyright (c) 2007-2008 UT-Battelle, LLC. All rights reserved. +Copyright (c) 2007-2010 IBM Corporation. All rights reserved. +Copyright (c) 1998-2005 Forschungszentrum Juelich, Juelich Supercomputing + Centre, Federal Republic of Germany +Copyright (c) 2005-2008 ZIH, TU Dresden, Federal Republic of Germany +Copyright (c) 2007 Evergrid, Inc. All rights reserved. +Copyright (c) 2008 Chelsio, Inc. All rights reserved. +Copyright (c) 2008-2009 Institut National de Recherche en + Informatique. All rights reserved. +Copyright (c) 2007 Lawrence Livermore National Security, LLC. + All rights reserved. +Copyright (c) 2007-2009 Mellanox Technologies. All rights reserved. +Copyright (c) 2006-2010 QLogic Corporation. All rights reserved. +Copyright (c) 2008-2010 Oak Ridge National Labs. All rights reserved. +Copyright (c) 2006-2010 Oracle and/or its affiliates. All rights reserved. +Copyright (c) 2009 Bull SAS. All rights reserved. +Copyright (c) 2010 ARM ltd. All rights reserved. +Copyright (c) 2010-2011 Alex Brick . All rights reserved. +Copyright (c) 2012 The University of Wisconsin-La Crosse. All rights + reserved. +Copyright (c) 2013-2014 Intel, Inc. All rights reserved. +Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved. + +$COPYRIGHT$ + +Additional copyrights may follow + +$HEADER$ + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +- Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer listed + in this license in the documentation and/or other materials + provided with the distribution. + +- Neither the name of the copyright holders nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +The copyright holders provide no reassurances that the source code +provided does not infringe any patent, copyright, or any other +intellectual property rights of third parties. The copyright holders +disclaim any liability to any recipient for claims brought against +recipient by any third party for infringement of that parties +intellectual property rights. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/opal/mca/pmix/pmix1xx/pmix/Makefile.am b/opal/mca/pmix/pmix1xx/pmix/Makefile.am new file mode 100644 index 0000000000..6260880043 --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/Makefile.am @@ -0,0 +1,68 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved. +# Copyright (c) 2013-2015 Intel, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Note that the -I directory must *exactly* match what was specified +# via AC_CONFIG_MACRO_DIR in configure.ac. +ACLOCAL_AMFLAGS = -I ./config + +headers = +sources = +nodist_headers = + +# Only install the valgrind suppressions file if we're building in +# standalone mode +dist_pmixdata_DATA = +if ! PMIX_EMBEDDED_MODE +dist_pmixdata_DATA += contrib/pmix-valgrind.supp +endif + +EXTRA_DIST = README INSTALL VERSION LICENSE autogen.sh \ + config/pmix_get_version.sh + +EXTRA_DIST += \ + test/test_common.h test/cli_stages.h \ + test/server_callbacks.h test/test_fence.h \ + test/test_publish.h test/test_resolve_peers.h \ + test/test_spawn.h test/utils.h test/test_cd.h + +include config/Makefile.am +include include/Makefile.am +include src/class/Makefile.am +include src/include/Makefile.am +include src/buffer_ops/Makefile.am +include src/util/Makefile.am +include src/usock/Makefile.am +include src/client/Makefile.am +include src/server/Makefile.am +include src/sec/Makefile.am + +lib_LTLIBRARIES = libpmix.la + +libpmix_la_SOURCES = $(headers) $(sources) +libpmix_la_LDFLAGS = -version-info $(libpmix_so_version) + +if ! PMIX_EMBEDDED_MODE +SUBDIRS = . test examples +endif + +dist-hook: + env LS_COLORS= sh "$(top_srcdir)/config/distscript.sh" "$(top_srcdir)" "$(distdir)" "$(PMIX_VERSION)" "$(PMIX_REPO_REV)" + diff --git a/opal/mca/pmix/pmix1xx/pmix/README b/opal/mca/pmix/pmix1xx/pmix/README new file mode 100644 index 0000000000..9bdfda2824 --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/README @@ -0,0 +1,395 @@ +Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana + University Research and Technology + Corporation. All rights reserved. +Copyright (c) 2004-2007 The University of Tennessee and The University + of Tennessee Research Foundation. All rights + reserved. +Copyright (c) 2004-2008 High Performance Computing Center Stuttgart, + University of Stuttgart. All rights reserved. +Copyright (c) 2004-2007 The Regents of the University of California. + All rights reserved. +Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. +Copyright (c) 2006-2011 Mellanox Technologies. All rights reserved. +Copyright (c) 2006-2012 Oracle and/or its affiliates. All rights reserved. +Copyright (c) 2007 Myricom, Inc. All rights reserved. +Copyright (c) 2008 IBM Corporation. All rights reserved. +Copyright (c) 2010 Oak Ridge National Labs. All rights reserved. +Copyright (c) 2011 University of Houston. All rights reserved. +Copyright (c) 2013 Intel, Inc. All rights reserved +$COPYRIGHT$ + +Additional copyrights may follow + +$HEADER$ + +=========================================================================== + +When submitting questions and problems, be sure to include as much +extra information as possible. This web page details all the +information that we request in order to provide assistance: + + http://www.open-mpi.org/community/help/ + +The best way to report bugs, send comments, or ask questions is to +sign up on the user's and/or developer's mailing list (for user-level +and developer-level questions; when in doubt, send to the user's +list): + + pmix-users@open-mpi.org + pmix-devel@open-mpi.org + +Because of spam, only subscribers are allowed to post to these lists +(ensure that you subscribe with and post from exactly the same e-mail +address -- joe@example.com is considered different than +joe@mycomputer.example.com!). Visit these pages to subscribe to the +lists: + + http://www.open-mpi.org/mailman/listinfo.cgi/pmix-users + http://www.open-mpi.org/mailman/listinfo.cgi/pmix-devel + +Thanks for your time. + +=========================================================================== + +Much, much more information is also available in the PMIx FAQ: + + http://www.open-mpi.org/faq/ + +=========================================================================== + +The following abbreviated list of release notes applies to this code +base as of this writing (11 November 2013): + +General notes +------------- + +- The majority of PMIx's documentation is here in this file, the + included man pages, and on the web site FAQ + (http://www.open-mpi.org/projects/pmix). This will eventually be supplemented + with cohesive installation and user documentation files. + +- Note that PMIx documentation uses the word "component" + frequently; the word "plugin" is probably more familiar to most + users. As such, end users can probably completely substitute the + word "plugin" wherever you see "component" in our documentation. + For what it's worth, we use the word "component" for historical + reasons, mainly because it is part of our acronyms and internal API + functionc calls. + +- Systems that have been tested are: + - Linux (various flavors/distros), 32 bit, with gcc + - Linux (various flavors/distros), 64 bit (x86), with gcc, Absoft, + Intel, and Portland (*) + - OS X (10.5, 10.6, 10.7), 32 and 64 bit (x86_64), with gcc and + Absoft compilers (*) + + (*) Be sure to read the Compiler Notes, below. + +Compiler Notes +-------------- + +- The Portland Group compilers prior to version 7.0 require the + "-Msignextend" compiler flag to extend the sign bit when converting + from a shorter to longer integer. This is is different than other + compilers (such as GNU). When compiling PMIx with the Portland + compiler suite, the following flags should be passed to PMIx's + configure script: + + shell$ ./configure CFLAGS=-Msignextend ... + + This will compile PMIx with the proper compile flags + +- Running on nodes with different endian and/or different datatype + sizes within a single parallel job is supported in this release. + However, PMIx does not resize data when datatypes differ in size + (for example, sending a 4 byte double and receiving an 8 byte + double will fail). + + +=========================================================================== + +Building PMIx +----------------- + +PMIx uses a traditional configure script paired with "make" to +build. Typical installs can be of the pattern: + +--------------------------------------------------------------------------- +shell$ ./configure [...options...] +shell$ make all install +--------------------------------------------------------------------------- + +There are many available configure options (see "./configure --help" +for a full list); a summary of the more commonly used ones follows: + +INSTALLATION OPTIONS + +--prefix= + Install PMIx into the base directory named . Hence, + PMIx will place its executables in /bin, its header + files in /include, its libraries in /lib, etc. + +--disable-shared + By default, libmpi is built as a shared library, and all components + are built as dynamic shared objects (DSOs). This switch disables + this default; it is really only useful when used with + --enable-static. Specifically, this option does *not* imply + --enable-static; enabling static libraries and disabling shared + libraries are two independent options. + +--enable-static + Build libmpi as a static library, and statically link in all + components. Note that this option does *not* imply + --disable-shared; enabling static libraries and disabling shared + libraries are two independent options. + +--enable-dlopen + Build all of PMIx's components as standalone Dynamic Shared + Objects (DSO's) that are loaded at run-time. The opposite of this + option, --disable-dlopen, causes two things: + + 1. All of PMIx's components will be built as part of PMIx's + normal libraries (e.g., libmpi). + 2. PMIx will not attempt to open any DSO's at run-time. + + Note that this option does *not* imply that OMPI's libraries will be + built as static objects (e.g., libmpi.a). It only specifies the + location of OMPI's components: standalone DSOs or folded into the + PMIx libraries. You can control whether PMIx's libraries + are build as static or dynamic via --enable|disable-static and + --enable|disable-shared. + +--with-platform=FILE + Load configure options for the build from FILE. Options on the + command line that are not in FILE are also used. Options on the + command line and in FILE are replaced by what is in FILE. + +MISCELLANEOUS SUPPORT LIBRARIES + +--with-libltdl[=VALUE] + This option specifies where to find the GNU Libtool libltdl support + library. The following VALUEs are permitted: + + internal: Use PMIx's internal copy of libltdl. + external: Use an external libltdl installation (rely on default + compiler and linker paths to find it) + : Same as "internal". + : Specify the location of a specific libltdl + installation to use + + By default (or if --with-libltdl is specified with no VALUE), PMIx + will build and use the copy of libltdl that it has in its source + tree. However, if the VALUE is "external", PMIx will look for + the relevant libltdl header file and library in default compiler / + linker locations. Or, VALUE can be a directory tree where the + libltdl header file and library can be found. This option allows + operating systems to include PMIx and use their default libltdl + installation instead of PMIx's bundled libltdl. + + Note that this option is ignored if --disable-dlopen is specified. + +--with-threads=value + Since thread support is only partially tested, it is disabled by + default. To enable threading, use "--with-threads=posix". This is + most useful when combined with --enable-mpi-thread-multiple. + +Once PMIx has been built and installed, it is safe to run "make +clean" and/or remove the entire build tree. + +VPATH and parallel builds are fully supported. + +Generally speaking, the only thing that users need to do to use Open +MPI /lib is in their LD_LIBRARY_PATH. Users may need to ensure to set +LD_LIBRARY_PATH in their shell setup files (e.g., .bashrc, .cshrc) +so that non-interactive rsh/ssh-based logins will be able to find the +PMIx library. + +=========================================================================== + +PMIx Version Numbers and Binary Compatibility +------------------------------------------------- + +PMIx has two sets of version numbers that are likely of interest +to end users / system administrator: + + * Software version number + * Shared library version numbers + +Both are described below, followed by a discussion of application +binary interface (ABI) compatibility implications. + +Software Version Number +----------------------- + +PMIx's version numbers are the union of several different values: +major, minor, release, and an optional quantifier. + + * Major: The major number is the first integer in the version string + (e.g., v1.2.3). Changes in the major number typically indicate a + significant change in the code base and/or end-user + functionality. The major number is always included in the version + number. + + * Minor: The minor number is the second integer in the version + string (e.g., v1.2.3). Changes in the minor number typically + indicate a incremental change in the code base and/or end-user + functionality. The minor number is always included in the version + number: + + o Even minor release numbers are part of "super-stable" + release series (e.g., v1.4.0). Releases in super stable series + are well-tested, time-tested, and mature. Such releases are + recommended for production sites. Changes between subsequent + releases in super stable series are expected to be fairly small. + o Odd minor release numbers are part of "feature" release + series (e.g., 1.3.7). Releases in feature releases are + well-tested, but they are not necessarily time-tested or as + mature as super stable releases. Changes between subsequent + releases in feature series may be large. + + * Release: The release number is the third integer in the version + string (e.g., v1.2.3). Changes in the release number typically + indicate a bug fix in the code base and/or end-user + functionality. If the release number is 0, it is omitted from the + version number (e.g., v1.2 has a release number of 0). + + * Quantifier: PMIx version numbers sometimes have an arbitrary + string affixed to the end of the version number. Common strings + include: + + o aX: Indicates an alpha release. X is an integer indicating + the number of the alpha release (e.g., v1.2.3a5 indicates the + 5th alpha release of version 1.2.3). + o bX: Indicates a beta release. X is an integer indicating + the number of the beta release (e.g., v1.2.3b3 indicates the 3rd + beta release of version 1.2.3). + o rcX: Indicates a release candidate. X is an integer + indicating the number of the release candidate (e.g., v1.2.3rc4 + indicates the 4th release candidate of version 1.2.3). + +Althought the major, minor, and release values (and optional +quantifiers) are reported in PMIx nightly snapshot tarballs, the +filenames of these snapshot tarballs follow a slightly different +convention. + +Specifically, the snapshot tarball filename contains three distinct +values: + + * Most recent Git tag name on the branch from which the tarball was + created. + + * An integer indicating how many Git commits have occurred since + that Git tag. + + * The Git hash of the tip of the branch. + +For example, a snapshot tarball filename of +"pmix-v1.0.2-57-gb9f1fd9.tar.bz2" indicates that this tarball was +created from the v1.0 branch, 57 Git commits after the "v1.0.2" tag, +specifically at Git hash gb9f1fd9. + +PMIx's Git master branch contains a single "dev" tag. For example, +"pmix-dev-8-gf21c349.tar.bz2" represents a snapshot tarball created +from the master branch, 8 Git commits after the "dev" tag, +specifically at Git hash gf21c349. + +The exact value of the "number of Git commits past a tag" integer is +fairly meaningless; its sole purpose is to provide an easy, +human-recognizable ordering for snapshot tarballs. + +Shared Library Version Number +----------------------------- + +PMIx uses the GNU Libtool shared library versioning scheme. + +NOTE: Only official releases of PMIx adhere to this versioning + scheme. "Beta" releases, release candidates, and nightly + tarballs, developer snapshots, and Git snapshot tarballs likely + will all have arbitrary/meaningless shared library version + numbers. + +The GNU Libtool official documentation details how the versioning +scheme works. The quick version is that the shared library versions +are a triple of integers: (current,revision,age), or "c:r:a". This +triple is not related to the PMIx software version number. There +are six simple rules for updating the values (taken almost verbatim +from the Libtool docs): + + 1. Start with version information of "0:0:0" for each shared library. + + 2. Update the version information only immediately before a public + release of your software. More frequent updates are unnecessary, + and only guarantee that the current interface number gets larger + faster. + + 3. If the library source code has changed at all since the last + update, then increment revision ("c:r:a" becomes "c:r+1:a"). + + 4. If any interfaces have been added, removed, or changed since the + last update, increment current, and set revision to 0. + + 5. If any interfaces have been added since the last public release, + then increment age. + + 6. If any interfaces have been removed since the last public release, + then set age to 0. + +Application Binary Interface (ABI) Compatibility +------------------------------------------------ + +PMIx provides forward ABI compatibility in all versions of a given +feature release series and its corresponding +super stable series. For example, on a single platform, an MPI +application linked against PMIx v1.3.2 shared libraries can be +updated to point to the shared libraries in any successive v1.3.x or +v1.4 release and still work properly (e.g., via the LD_LIBRARY_PATH +environment variable or other operating system mechanism). + +PMIx reserves the right to break ABI compatibility at new feature +release series. For example, the same MPI application from above +(linked against PMIx v1.3.2 shared libraries) will *not* work with +PMIx v1.5 shared libraries. + +=========================================================================== + +Common Questions +---------------- + +Many common questions about building and using PMIx are answered +on the FAQ: + + http://www.open-mpi.org/faq/ + +=========================================================================== + +Got more questions? +------------------- + +Found a bug? Got a question? Want to make a suggestion? Want to +contribute to PMIx? Please let us know! + +When submitting questions and problems, be sure to include as much +extra information as possible. This web page details all the +information that we request in order to provide assistance: + + http://www.open-mpi.org/community/help/ + +User-level questions and comments should generally be sent to the +user's mailing list (users@open-mpi.org). Because of spam, only +subscribers are allowed to post to this list (ensure that you +subscribe with and post from *exactly* the same e-mail address -- +joe@example.com is considered different than +joe@mycomputer.example.com!). Visit this page to subscribe to the +user's list: + + http://www.open-mpi.org/mailman/listinfo.cgi/pmix-users + +Developer-level bug reports, questions, and comments should generally +be sent to the developer's mailing list (devel@open-mpi.org). Please +do not post the same question to both lists. As with the user's list, +only subscribers are allowed to post to the developer's list. Visit +the following web page to subscribe: + + http://www.open-mpi.org/mailman/listinfo.cgi/pmix-devel + +Make today an PMIx day! diff --git a/opal/mca/pmix/pmix1xx/pmix/VERSION b/opal/mca/pmix/pmix1xx/pmix/VERSION new file mode 100644 index 0000000000..928f3d895e --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/VERSION @@ -0,0 +1,65 @@ +# Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved. +# Copyright (c) 2008-2011 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2011 NVIDIA Corporation. All rights reserved. +# Copyright (c) 2013 Mellanox Technologies, Inc. +# All rights reserved. +# Copyright (c) 2014-2015 Intel, Inc. All rights reserved. + +# This is the VERSION file for PMIx, describing the precise +# version of PMIx in this distribution. The various components of +# the version number below are combined to form a single version +# number string. + +# major, minor, and release are generally combined in the form +# ... + +major=1 +minor=1 +release=0 + +# greek is used for alpha or beta release tags. If it is non-empty, +# it will be appended to the version number. It does not have to be +# numeric. Common examples include a1 (alpha release 1), b1 or (beta release 1). +# The only requirement is that it must be entirely printable ASCII +# characters and have no white space. + +greek=a1 + +# If repo_rev is empty, then the repository version number will be +# obtained during "make dist" via the "git describe --tags --always" +# command, or with the date (if "git describe" fails) in the form of +# "date". + +repo_rev=gitebb9a6a + +# If tarball_version is not empty, it is used as the version string in +# the tarball filename, regardless of all other versions listed in +# this file. For example, if tarball_version is empty, the tarball +# filename will be of the form +# openmpi-...tar.*. However, if +# tarball_version is not empty, the tarball filename will be of the +# form openmpi-.tar.*. + +tarball_version= + +# The date when this release was created + +date="Aug 28, 2015" + +# The shared library version of each of PMIx's public libraries. +# These versions are maintained in accordance with the "Library +# Interface Versions" chapter from the GNU Libtool documentation. +# All changes in these version numbers are dictated by the PMIx +# release managers (not individual developers). Notes: + +# 1. Since these version numbers are associated with *releases*, the +# version numbers maintained on the PMIx Github trunk (and developer +# branches) is always 0:0:0 for all libraries. + +# 2. The version number of libpmix refers to the public pmix interfaces. +# It does not refer to any internal interfaces. + +# Version numbers are described in the Libtool current:revision:age +# format. + +libpmix_so_version=0:0:0 diff --git a/opal/mca/pmix/pmix1xx/pmix/autogen.sh b/opal/mca/pmix/pmix1xx/pmix/autogen.sh new file mode 100755 index 0000000000..b5b509eac8 --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/autogen.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +# Run all the rest of the Autotools +echo "==> Running autoreconf"; +autoreconf ${autoreconf_args:-"-ivf"} diff --git a/opal/mca/pmix/pmix1xx/pmix/config/Makefile.am b/opal/mca/pmix/pmix1xx/pmix/config/Makefile.am new file mode 100644 index 0000000000..34871a3c72 --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/config/Makefile.am @@ -0,0 +1,49 @@ +# PMIx copyrights: +# Copyright (c) 2013 Intel, Inc. All rights reserved +# +######################### +# +# Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2006-2010 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2010 Oracle and/or its affiliates. All rights +# reserved. +######################### +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +EXTRA_DIST += \ + config/pmix_get_version.sh \ + config/distscript.sh \ + config/pmix_check_attributes.m4 \ + config/pmix_check_broken_qsort.m4 \ + config/pmix_check_compiler_version.m4 \ + config/pmix_check_ident.m4 \ + config/pmix_check_munge.m4 \ + config/pmix_check_package.m4 \ + config/pmix_check_sasl.m4 \ + config/pmix_check_vendor.m4 \ + config/pmix_check_visibility.m4 \ + config/pmix_ensure_contains_optflags.m4 \ + config/pmix_functions.m4 \ + config/pmix.m4 \ + config/pmix_search_libs.m4 \ + config/pmix_setup_cc.m4 \ + config/pmix_setup_hwloc.m4 \ + config/pmix_setup_libevent.m4 + + +maintainer-clean-local: + rm -f config/pmix_get_version.sh diff --git a/opal/mca/pmix/pmix1xx/pmix/config/distscript.sh b/opal/mca/pmix/pmix1xx/pmix/config/distscript.sh new file mode 100755 index 0000000000..fbb37a7871 --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/config/distscript.sh @@ -0,0 +1,56 @@ +#!/bin/sh +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Research Organization for Information Science +# and Technology (RIST). All rights reserved. +# Copyright (c) 2015 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +srcdir=$1 +builddir=$PWD +distdir=$builddir/$2 +PMIX_REPO_REV=$3 + +if test x"$2" = x ; then + echo "*** ERROR: Must supply relative distdir as argv[2] -- aborting" + exit 1 +elif test ! -d "$distdir" ; then + echo "*** ERROR: dist dir does not exist" + echo "*** ERROR: $distdir" + exit 1 +fi + +# We can catch some hard (but possible) to do mistakes by looking at +# our repo's revision, but only if we are in the source tree. +# Otherwise, use what configure told us, at the cost of allowing one +# or two corner cases in (but otherwise VPATH builds won't work). +repo_rev=$PMIX_REPO_REV +if test -d .git ; then + repo_rev=$(config/pmix_get_version.sh VERSION --repo-rev) +fi + +# +# Update VERSION:repo_rev with the best value we have. +# +perl -pi -e 's/^repo_rev=.*/repo_rev='$repo_rev'/' -- "${distdir}/VERSION" +# need to reset the timestamp to not annoy AM dependencies +touch -r "${srcdir}/VERSION" "${distdir}/VERSION" + +echo "*** Updated VERSION file with repo rev: $repo_rev" +echo "*** (via dist-hook / config/distscript.sh)" diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix.m4 b/opal/mca/pmix/pmix1xx/pmix/config/pmix.m4 new file mode 100644 index 0000000000..cc89df1bef --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/config/pmix.m4 @@ -0,0 +1,759 @@ +dnl -*- shell-script -*- +dnl +dnl This code has been adapted from opal_configure_options.m4 in the Open MPI +dnl code base - per the Open MPI license, all copyrights are retained below. +dnl +dnl Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. +dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. +dnl Copyright (c) 2009 IBM Corporation. All rights reserved. +dnl Copyright (c) 2009 Los Alamos National Security, LLC. All rights +dnl reserved. +dnl Copyright (c) 2009-2011 Oak Ridge National Labs. All rights reserved. +dnl Copyright (c) 2011-2013 NVIDIA Corporation. All rights reserved. +dnl Copyright (c) 2013-2015 Intel, Inc. All rights reserved +dnl +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + +AC_DEFUN([PMIX_SETUP_CORE],[ + + AC_REQUIRE([AC_USE_SYSTEM_EXTENSIONS]) + AC_REQUIRE([AC_CANONICAL_TARGET]) + AC_REQUIRE([AC_PROG_CC]) + + # If no prefix was defined, set a good value + m4_ifval([$1], + [m4_define([pmix_config_prefix],[$1/])], + [m4_define([pmix_config_prefix], [])]) + + # Get pmix's absolute top builddir (which may not be the same as + # the real $top_builddir) + PMIX_startdir=`pwd` + if test x"pmix_config_prefix" != "x" -a ! -d "pmix_config_prefix"; then + mkdir -p "pmix_config_prefix" + fi + if test x"pmix_config_prefix" != "x"; then + cd "pmix_config_prefix" + fi + PMIX_top_builddir=`pwd` + AC_SUBST(PMIX_top_builddir) + + # Get pmix's absolute top srcdir (which may not be the same as the + # real $top_srcdir. First, go back to the startdir incase the + # $srcdir is relative. + + cd "$PMIX_startdir" + cd "$srcdir"/pmix_config_prefix + PMIX_top_srcdir="`pwd`" + AC_SUBST(PMIX_top_srcdir) + + # Go back to where we started + cd "$PMIX_startdir" + + AC_MSG_NOTICE([pmix builddir: $PMIX_top_builddir]) + AC_MSG_NOTICE([pmix srcdir: $PMIX_top_srcdir]) + if test "$PMIX_top_builddir" != "$PMIX_top_srcdir"; then + AC_MSG_NOTICE([Detected VPATH build]) + fi + + # Get the version of pmix that we are installing + AC_MSG_CHECKING([for pmix version]) + PMIX_VERSION="`$PMIX_top_srcdir/config/pmix_get_version.sh $PMIX_top_srcdir/VERSION`" + if test "$?" != "0"; then + AC_MSG_ERROR([Cannot continue]) + fi + PMIX_RELEASE_DATE="`$PMIX_top_srcdir/config/pmix_get_version.sh $PMIX_top_srcdir/VERSION --release-date`" + AC_SUBST(PMIX_VERSION) + AC_DEFINE_UNQUOTED([PMIX_VERSION], ["$PMIX_VERSION"], + [The library version is always available, contrary to VERSION]) + AC_SUBST(PMIX_RELEASE_DATE) + AC_MSG_RESULT([$PMIX_VERSION]) + + # Debug mode? + AC_MSG_CHECKING([if want pmix maintainer support]) + pmix_debug= + AS_IF([test "$pmix_debug" = "" -a "$enable_debug" = "yes"], + [pmix_debug=1 + pmix_debug_msg="enabled"]) + AS_IF([test "$pmix_debug" = ""], + [pmix_debug=0 + pmix_debug_msg="disabled"]) + # Grr; we use #ifndef for PMIX_DEBUG! :-( + AH_TEMPLATE(PMIX_ENABLE_DEBUG, [Whether we are in debugging mode or not]) + AS_IF([test "$pmix_debug" = "1"], [AC_DEFINE([PMIX_ENABLE_DEBUG])]) + AC_MSG_RESULT([$pmix_debug_msg]) + + AC_MSG_CHECKING([for pmix directory prefix]) + AC_MSG_RESULT(m4_ifval([$1], pmix_config_prefix, [(none)])) + + # Note that private/config.h *MUST* be listed first so that it + # becomes the "main" config header file. Any AC-CONFIG-HEADERS + # after that (pmix/config.h) will only have selective #defines + # replaced, not the entire file. + AC_CONFIG_HEADERS(pmix_config_prefix[include/private/autogen/config.h]) + AC_CONFIG_HEADERS(pmix_config_prefix[include/pmix/autogen/config.h]) + AC_CONFIG_HEADERS(pmix_config_prefix[include/pmix/pmix_common.h]) + + # What prefix are we using? + AC_MSG_CHECKING([for pmix symbol prefix]) + AS_IF([test "$pmix_symbol_prefix_value" = ""], + [AS_IF([test "$with_pmix_symbol_prefix" = ""], + [pmix_symbol_prefix_value=pmix_], + [pmix_symbol_prefix_value=$with_pmix_symbol_prefix])]) + AC_DEFINE_UNQUOTED(PMIX_SYM_PREFIX, [$pmix_symbol_prefix_value], + [The pmix symbol prefix]) + # Ensure to [] escape the whole next line so that we can get the + # proper tr tokens + [pmix_symbol_prefix_value_caps="`echo $pmix_symbol_prefix_value | tr '[:lower:]' '[:upper:]'`"] + AC_DEFINE_UNQUOTED(PMIX_SYM_PREFIX_CAPS, [$pmix_symbol_prefix_value_caps], + [The pmix symbol prefix in all caps]) + AC_MSG_RESULT([$pmix_symbol_prefix_value]) + + # Give an easy #define to know if we need to transform all the + # pmix names + AH_TEMPLATE([PMIX_SYM_TRANSFORM], [Whether we need to re-define all the pmix public symbols or not]) + AS_IF([test "$pmix_symbol_prefix_value" = "pmix_"], + [AC_DEFINE([PMIX_SYM_TRANSFORM], [0])], + [AC_DEFINE([PMIX_SYM_TRANSFORM], [1])]) + + # GCC specifics. + if test "x$GCC" = "xyes"; then + PMIX_GCC_CFLAGS="-Wall -Wmissing-prototypes -Wundef" + PMIX_GCC_CFLAGS="$PMIX_GCC_CFLAGS -Wpointer-arith -Wcast-align" + fi + + ############################################################################ + # Check for compilers and preprocessors + ############################################################################ + pmix_show_title "Compiler and preprocessor tests" + + ################################## + # C compiler characteristics + ################################## + # Does the compiler support "ident"-like constructs? + PMIX_CHECK_IDENT([CC], [CFLAGS], [c], [C]) + PMIX_SETUP_CC + + # + # Check for some types + # + + AC_CHECK_TYPES(int8_t) + AC_CHECK_TYPES(uint8_t) + AC_CHECK_TYPES(int16_t) + AC_CHECK_TYPES(uint16_t) + AC_CHECK_TYPES(int32_t) + AC_CHECK_TYPES(uint32_t) + AC_CHECK_TYPES(int64_t) + AC_CHECK_TYPES(uint64_t) + AC_CHECK_TYPES(long long) + + AC_CHECK_TYPES(intptr_t) + AC_CHECK_TYPES(uintptr_t) + AC_CHECK_TYPES(ptrdiff_t) + + # + # Check for type sizes + # + + AC_CHECK_SIZEOF(_Bool) + AC_CHECK_SIZEOF(char) + AC_CHECK_SIZEOF(short) + AC_CHECK_SIZEOF(int) + AC_CHECK_SIZEOF(long) + if test "$ac_cv_type_long_long" = yes; then + AC_CHECK_SIZEOF(long long) + fi + AC_CHECK_SIZEOF(float) + AC_CHECK_SIZEOF(double) + + AC_CHECK_SIZEOF(void *) + AC_CHECK_SIZEOF(size_t) + if test "$ac_cv_type_ssize_t" = yes ; then + AC_CHECK_SIZEOF(ssize_t) + fi + if test "$ac_cv_type_ptrdiff_t" = yes; then + AC_CHECK_SIZEOF(ptrdiff_t) + fi + AC_CHECK_SIZEOF(wchar_t) + + AC_CHECK_SIZEOF(pid_t) + + + # + # Does the C compiler native support "bool"? (i.e., without + # or any other help) + # + + PMIX_VAR_SCOPE_PUSH([MSG]) + AC_MSG_CHECKING(for C bool type) + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([ + AC_INCLUDES_DEFAULT], + [[bool bar, foo = true; bar = foo;]])], + [PMIX_NEED_C_BOOL=0 MSG=yes],[PMIX_NEED_C_BOOL=1 MSG=no]) + AC_DEFINE_UNQUOTED(PMIX_NEED_C_BOOL, $PMIX_NEED_C_BOOL, + [Whether the C compiler supports "bool" without any other help (such as )]) + AC_MSG_RESULT([$MSG]) + AC_CHECK_SIZEOF(_Bool) + PMIX_VAR_SCOPE_POP + + # + # Check for other compiler characteristics + # + + PMIX_VAR_SCOPE_PUSH([PMIX_CFLAGS_save]) + if test "$GCC" = "yes"; then + + # gcc 2.96 will emit oodles of warnings if you use "inline" with + # -pedantic (which we do in developer builds). However, + # "__inline__" is ok. So we have to force gcc to select the + # right one. If you use -pedantic, the AC_C_INLINE test will fail + # (because it names a function foo() -- without the (void)). So + # we turn off all the picky flags, turn on -ansi mode (which is + # implied by -pedantic), and set warnings to be errors. Hence, + # this does the following (for 2.96): + # + # - causes the check for "inline" to emit a warning, which then + # fails + # - checks for __inline__, which then emits no error, and works + # + # This also works nicely for gcc 3.x because "inline" will work on + # the first check, and all is fine. :-) + + PMIX_CFLAGS_save=$CFLAGS + CFLAGS="$PMIX_CFLAGS_BEFORE_PICKY -Werror -ansi" + fi + AC_C_INLINE + if test "$GCC" = "yes"; then + CFLAGS=$PMIX_CFLAGS_save + fi + PMIX_VAR_SCOPE_POP + + if test "x$CC" = "xicc"; then + PMIX_CHECK_ICC_VARARGS + fi + + + ################################## + # Only after setting up + # C do we check compiler attributes. + ################################## + + pmix_show_subtitle "Compiler characteristics" + + PMIX_CHECK_ATTRIBUTES + PMIX_CHECK_COMPILER_VERSION_ID + + ################################## + # Header files + ################################## + + pmix_show_title "Header file tests" + + AC_CHECK_HEADERS([arpa/inet.h \ + fcntl.h inttypes.h libgen.h \ + netinet/in.h \ + stdint.h stddef.h \ + stdlib.h string.h strings.h \ + sys/param.h \ + sys/select.h sys/socket.h \ + stdarg.h sys/stat.h sys/time.h \ + sys/types.h sys/un.h sys/uio.h net/uio.h \ + sys/wait.h syslog.h \ + time.h unistd.h \ + crt_externs.h signal.h \ + ioLib.h sockLib.h hostLib.h limits.h]) + + # Note that sometimes we have , but it doesn't work (e.g., + # have both Portland and GNU installed; using pgcc will find GNU's + # , which all it does -- by standard -- is define "bool" to + # "_Bool" [see + # http://www.opengroup.org/onlinepubs/009695399/basedefs/stdbool.h.html], + # and Portland has no idea what to do with _Bool). + + # So first figure out if we have (i.e., check the value of + # the macro HAVE_STDBOOL_H from the result of AC_CHECK_HEADERS, + # above). If we do have it, then check to see if it actually works. + # Define PMIX_USE_STDBOOL_H as approrpaite. + AC_CHECK_HEADERS([stdbool.h], [have_stdbool_h=1], [have_stdbool_h=0]) + AC_MSG_CHECKING([if works]) + if test "$have_stdbool_h" = "1"; then + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([AC_INCLUDES_DEFAULT[ + #if HAVE_STDBOOL_H + #include + #endif + ]], + [[bool bar, foo = true; bar = foo;]])], + [PMIX_USE_STDBOOL_H=1 MSG=yes],[PMIX_USE_STDBOOL_H=0 MSG=no]) + else + PMIX_USE_STDBOOL_H=0 + MSG="no (don't have )" + fi + AC_DEFINE_UNQUOTED(PMIX_USE_STDBOOL_H, $PMIX_USE_STDBOOL_H, + [Whether to use or not]) + AC_MSG_RESULT([$MSG]) + + # checkpoint results + AC_CACHE_SAVE + + ################################## + # Types + ################################## + + pmix_show_title "Type tests" + + AC_CHECK_TYPES([socklen_t, struct sockaddr_in, struct sockaddr_un, + struct sockaddr_in6, struct sockaddr_storage], + [], [], [AC_INCLUDES_DEFAULT + #if HAVE_SYS_SOCKET_H + #include + #endif + #if HAVE_SYS_UN_H + #include + #endif + #ifdef HAVE_NETINET_IN_H + #include + #endif + ]) + + AC_CHECK_DECLS([AF_UNSPEC, PF_UNSPEC, AF_INET6, PF_INET6], + [], [], [AC_INCLUDES_DEFAULT + #if HAVE_SYS_SOCKET_H + #include + #endif + #ifdef HAVE_NETINET_IN_H + #include + #endif + ]) + + # SA_RESTART in signal.h + PMIX_VAR_SCOPE_PUSH([MSG2]) + AC_MSG_CHECKING([if SA_RESTART defined in signal.h]) + AC_EGREP_CPP(yes, [ + #include + #ifdef SA_RESTART + yes + #endif + ], [MSG2=yes VALUE=1], [MSG2=no VALUE=0]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_SA_RESTART, $VALUE, + [Whether we have SA_RESTART in or not]) + AC_MSG_RESULT([$MSG2]) + PMIX_VAR_SCOPE_POP + + AC_CHECK_MEMBERS([struct sockaddr.sa_len], [], [], [ + #include + #if HAVE_SYS_SOCKET_H + #include + #endif + ]) + + AC_CHECK_MEMBERS([struct dirent.d_type], [], [], [ + #include + #include ]) + + AC_CHECK_MEMBERS([siginfo_t.si_fd],,,[#include ]) + AC_CHECK_MEMBERS([siginfo_t.si_band],,,[#include ]) + + # + # Checks for struct member names in struct statfs + # + AC_CHECK_MEMBERS([struct statfs.f_type], [], [], [ + AC_INCLUDES_DEFAULT + #ifdef HAVE_SYS_VFS_H + #include + #endif + #ifdef HAVE_SYS_STATFS_H + #include + #endif + ]) + + AC_CHECK_MEMBERS([struct statfs.f_fstypename], [], [], [ + AC_INCLUDES_DEFAULT + #ifdef HAVE_SYS_PARAM_H + #include + #endif + #ifdef HAVE_SYS_MOUNT_H + #include + #endif + #ifdef HAVE_SYS_VFS_H + #include + #endif + #ifdef HAVE_SYS_STATFS_H + #include + #endif + ]) + + # + # Checks for struct member names in struct statvfs + # + AC_CHECK_MEMBERS([struct statvfs.f_basetype], [], [], [ + AC_INCLUDES_DEFAULT + #ifdef HAVE_SYS_STATVFS_H + #include + #endif + ]) + + AC_CHECK_MEMBERS([struct statvfs.f_fstypename], [], [], [ + AC_INCLUDES_DEFAULT + #ifdef HAVE_SYS_STATVFS_H + #include + #endif + ]) + + # + # Check for ptrdiff type. Yes, there are platforms where + # sizeof(void*) != sizeof(long) (64 bit Windows, apparently). + # + AC_MSG_CHECKING([for pointer diff type]) + if test $ac_cv_type_ptrdiff_t = yes ; then + pmix_ptrdiff_t="ptrdiff_t" + pmix_ptrdiff_size=$ac_cv_sizeof_ptrdiff_t + elif test $ac_cv_sizeof_void_p -eq $ac_cv_sizeof_long ; then + pmix_ptrdiff_t="long" + pmix_ptrdiff_size=$ac_cv_sizeof_long + elif test $ac_cv_type_long_long = yes -a $ac_cv_sizeof_void_p -eq $ac_cv_sizeof_long_long ; then + pmix_ptrdiff_t="long long" + pmix_ptrdiff_size=$ac_cv_sizeof_long_long + #else + # AC_MSG_ERROR([Could not find datatype to emulate ptrdiff_t. Cannot continue]) + fi + AC_DEFINE_UNQUOTED([PMIX_PTRDIFF_TYPE], [$pmix_ptrdiff_t], + [type to use for ptrdiff_t]) + AC_MSG_RESULT([$pmix_ptrdiff_t (size: $pmix_ptrdiff_size)]) + + ################################## + # Libraries + ################################## + + pmix_show_title "Library and Function tests" + + PMIX_SEARCH_LIBS_CORE([socket], [socket]) + + # IRIX and CentOS have dirname in -lgen, usually in libc + PMIX_SEARCH_LIBS_CORE([dirname], [gen]) + + # Darwin doesn't need -lm, as it's a symlink to libSystem.dylib + PMIX_SEARCH_LIBS_CORE([ceil], [m]) + + AC_CHECK_FUNCS([asprintf snprintf vasprintf vsnprintf strsignal socketpair strncpy_s usleep]) + + # On some hosts, htonl is a define, so the AC_CHECK_FUNC will get + # confused. On others, it's in the standard library, but stubbed with + # the magic glibc foo as not implemented. and on other systems, it's + # just not there. This covers all cases. + AC_CACHE_CHECK([for htonl define], + [ompi_cv_htonl_define], + [AC_PREPROC_IFELSE([AC_LANG_PROGRAM([ + #ifdef HAVE_SYS_TYPES_H + #include + #endif + #ifdef HAVE_NETINET_IN_H + #include + #endif + #ifdef HAVE_ARPA_INET_H + #include + #endif],[ + #ifndef ntohl + #error "ntohl not defined" + #endif + ])], [ompi_cv_htonl_define=yes], [ompi_cv_htonl_define=no])]) + AC_CHECK_FUNC([htonl], [ompi_have_htonl=yes], [ompi_have_htonl=no]) + AS_IF([test "$ompi_cv_htonl_define" = "yes" -o "$ompi_have_htonl" = "yes"], + [AC_DEFINE_UNQUOTED([HAVE_UNIX_BYTESWAP], [1], + [whether unix byteswap routines -- htonl, htons, nothl, ntohs -- are available])]) + + # + # Make sure we can copy va_lists (need check declared, not linkable) + # + + AC_CHECK_DECL(va_copy, PMIX_HAVE_VA_COPY=1, PMIX_HAVE_VA_COPY=0, + [#include ]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_VA_COPY, $PMIX_HAVE_VA_COPY, + [Whether we have va_copy or not]) + + AC_CHECK_DECL(__va_copy, PMIX_HAVE_UNDERSCORE_VA_COPY=1, + PMIX_HAVE_UNDERSCORE_VA_COPY=0, [#include ]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_UNDERSCORE_VA_COPY, $PMIX_HAVE_UNDERSCORE_VA_COPY, + [Whether we have __va_copy or not]) + + AC_CHECK_DECLS(__func__) + + # checkpoint results + AC_CACHE_SAVE + + ################################## + # System-specific tests + ################################## + + pmix_show_title "System-specific tests" + + AC_C_BIGENDIAN + PMIX_CHECK_BROKEN_QSORT + + ################################## + # Visibility + ################################## + + # Check the visibility declspec at the end to avoid problem with + # the previous tests that are not necessarily prepared for + # the visibility feature. + pmix_show_title "Symbol visibility feature" + + PMIX_CHECK_VISIBILITY + + ################################## + # Libevent + ################################## + pmix_show_title "Libevent" + + PMIX_LIBEVENT_CONFIG + + ################################## + # HWLOC + ################################## + pmix_show_title "HWLOC" + + PMIX_HWLOC_CONFIG + + ################################## + # SASL + ################################## + pmix_show_title "SASL" + + PMIX_SASL_CONFIG + + ################################## + # Munge + ################################## + pmix_show_title "Munge" + + PMIX_MUNGE_CONFIG + + ############################################################################ + # final compiler config + ############################################################################ + + pmix_show_subtitle "Compiler flags" + + # + # This is needed for VPATH builds, so that it will -I the appropriate + # include directory. We delayed doing it until now just so that + # '-I$(top_srcdir)' doesn't show up in any of the configure output -- + # purely aesthetic. + # + # Because pmix_config.h is created by AC_CONFIG_HEADERS, we + # don't need to -I the builddir for pmix/include. However, if we + # are VPATH building, we do need to include the source directories. + # + if test "$PMIX_top_builddir" != "$PMIX_top_srcdir"; then + # Note the embedded m4 directives here -- we must embed them + # rather than have successive assignments to these shell + # variables, lest the $(foo) names try to get evaluated here. + # Yuck! + CPPFLAGS='-I$(PMIX_top_srcdir) -I$(PMIX_top_builddir) -I$(PMIX_top_srcdir)/src -I$(PMIX_top_srcdir)/include -I$(PMIX_top_builddir)/include'" $CPPFLAGS" + else + CPPFLAGS='-I$(PMIX_top_srcdir) -I$(PMIX_top_srcdir)/src -I$(PMIX_top_srcdir)/include'" $CPPFLAGS" + fi + + # + # Delayed the substitution of CFLAGS and CXXFLAGS until now because + # they may have been modified throughout the course of this script. + # + + AC_SUBST(CFLAGS) + AC_SUBST(CPPFLAGS) + + # pmixdatadir, pmixlibdir, and pmixinclude are essentially the same as + # pkg*dir, but will always be */pmix. + pmixdatadir='${datadir}/pmix' + pmixlibdir='${libdir}/pmix' + pmixincludedir='${includedir}/pmix' + AC_SUBST(pmixdatadir) + AC_SUBST(pmixlibdir) + AC_SUBST(pmixincludedir) + + ############################################################################ + # final output + ############################################################################ + + pmix_show_subtitle "Final output" + + AC_CONFIG_FILES(pmix_config_prefix[Makefile]) + + # Success + $2 +])dnl + +AC_DEFUN([PMIX_DEFINE_ARGS],[ + # Embedded mode, or standalone? + AC_ARG_ENABLE([embedded-mode], + [AC_HELP_STRING([--enable-embedded-mode], + [Using --enable-embedded-mode causes PMIx to skip a few configure checks and install nothing. It should only be used when building PMIx within the scope of a larger package.])]) + AS_IF([test ! -z "$enable_embedded_mode" && test "$enable_embedded_mode" = "yes"], + [pmix_mode=embedded], + [pmix_mode=standalone]) + + # Change the symbol prefix? + AC_ARG_WITH([pmix-symbol-prefix], + AC_HELP_STRING([--with-pmix-symbol-prefix=STRING], + [STRING can be any valid C symbol name. It will be prefixed to all public PMIx symbols. Default: "pmix_"])) + +# +# Is this a developer copy? +# + +if test -d .git; then + PMIX_DEVEL=1 +else + PMIX_DEVEL=0 +fi + + +# +# Developer picky compiler options +# + +AC_MSG_CHECKING([if want developer-level compiler pickyness]) +AC_ARG_ENABLE(picky, + AC_HELP_STRING([--enable-picky], + [enable developer-level compiler pickyness when building PMIx (default: disabled)])) +if test "$enable_picky" = "yes"; then + AC_MSG_RESULT([yes]) + WANT_PICKY_COMPILER=1 +else + AC_MSG_RESULT([no]) + WANT_PICKY_COMPILER=0 +fi +#################### Early development override #################### +if test "$WANT_PICKY_COMPILER" = "0" -a -z "$enable_picky" -a "$PMIX_DEVEL" = 1; then + WANT_PICKY_COMPILER=1 + echo "--> developer override: enable picky compiler by default" +fi +#################### Early development override #################### + +# +# Developer debugging +# + +AC_MSG_CHECKING([if want developer-level debugging code]) +AC_ARG_ENABLE(debug, + AC_HELP_STRING([--enable-debug], + [enable developer-level debugging code (not for general PMIx users!) (default: disabled)])) +if test "$enable_debug" = "yes"; then + AC_MSG_RESULT([yes]) + WANT_DEBUG=1 +else + AC_MSG_RESULT([no]) + WANT_DEBUG=0 +fi +#################### Early development override #################### +if test "$WANT_DEBUG" = "0" -a -z "$enable_debug" -a "$PMIX_DEVEL" = "1"; then + WANT_DEBUG=1 + echo "--> developer override: enable debugging code by default" +fi +#################### Early development override #################### +if test "$WANT_DEBUG" = "0"; then + CFLAGS="-DNDEBUG $CFLAGS" + CXXFLAGS="-DNDEBUG $CXXFLAGS" +fi +AC_DEFINE_UNQUOTED(PMIX_ENABLE_DEBUG, $WANT_DEBUG, + [Whether we want developer-level debugging code or not]) + +AC_ARG_ENABLE(debug-symbols, + AC_HELP_STRING([--disable-debug-symbols], + [Disable adding compiler flags to enable debugging symbols if --enable-debug is specified. For non-debugging builds, this flag has no effect.])) + +# +# Do we want the pretty-print stack trace feature? +# + +AC_MSG_CHECKING([if want pretty-print stacktrace]) +AC_ARG_ENABLE([pretty-print-stacktrace], + [AC_HELP_STRING([--enable-pretty-print-stacktrace], + [Pretty print stacktrace on process signal (default: enabled)])]) +if test "$enable_pretty_print_stacktrace" = "no" ; then + AC_MSG_RESULT([no]) + WANT_PRETTY_PRINT_STACKTRACE=0 +else + AC_MSG_RESULT([yes]) + WANT_PRETTY_PRINT_STACKTRACE=1 +fi +AC_DEFINE_UNQUOTED([PMIX_WANT_PRETTY_PRINT_STACKTRACE], + [$WANT_PRETTY_PRINT_STACKTRACE], + [if want pretty-print stack trace feature]) + +# +# Ident string +# +AC_MSG_CHECKING([if want ident string]) +AC_ARG_WITH([ident-string], + [AC_HELP_STRING([--with-ident-string=STRING], + [Embed an ident string into PMIx object files])]) +if test "$with_ident_string" = "" -o "$with_ident_string" = "no"; then + with_ident_string="%VERSION%" +fi +# This is complicated, because $PMIX_VERSION may have spaces in it. +# So put the whole sed expr in single quotes -- i.e., directly +# substitute %VERSION% for (not expanded) $PMIX_VERSION. +with_ident_string="`echo $with_ident_string | sed -e 's/%VERSION%/$PMIX_VERSION/'`" + +# Now eval an echo of that so that the "$PMIX_VERSION" token is +# replaced with its value. Enclose the whole thing in "" so that it +# ends up as 1 token. +with_ident_string="`eval echo $with_ident_string`" + +AC_DEFINE_UNQUOTED([PMIX_IDENT_STRING], ["$with_ident_string"], + [ident string for PMIX]) +AC_MSG_RESULT([$with_ident_string]) + +# +# Timing support +# +AC_MSG_CHECKING([if want developer-level timing support]) +AC_ARG_ENABLE(timing, + AC_HELP_STRING([--enable-timing], + [enable developer-level timing code (default: disabled)])) +if test "$enable_timing" = "yes"; then + AC_MSG_RESULT([yes]) + WANT_TIMING=1 +else + AC_MSG_RESULT([no]) + WANT_TIMING=0 +fi + +AC_DEFINE_UNQUOTED([PMIX_ENABLE_TIMING], [$WANT_TIMING], + [Whether we want developer-level timing support or not]) + +])dnl + +# Specify the symbol prefix +AC_DEFUN([PMIX_SET_SYMBOL_PREFIX],[ + pmix_symbol_prefix_value=$1 +])dnl + +# This must be a standalone routine so that it can be called both by +# PMIX_INIT and an external caller (if PMIX_INIT is not invoked). +AC_DEFUN([PMIX_DO_AM_CONDITIONALS],[ + AS_IF([test "$pmix_did_am_conditionals" != "yes"],[ + AM_CONDITIONAL([PMIX_EMBEDDED_MODE], [test "x$pmix_mode" = "xembedded"]) + AM_CONDITIONAL([PMIX_COMPILE_TIMING], [test "$WANT_TIMING" = "1"]) + AM_CONDITIONAL([PMIX_WANT_MUNGE], [test "$pmix_munge_support" = "1"]) + AM_CONDITIONAL([PMIX_WANT_SASL], [test "$pmix_sasl_support" = "1"]) + ]) + pmix_did_am_conditionals=yes +])dnl + diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_attributes.m4 b/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_attributes.m4 new file mode 100644 index 0000000000..65bdb100f8 --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_attributes.m4 @@ -0,0 +1,591 @@ +# -*- shell-script -*- +# PMIx copyrights: +# Copyright (c) 2013 Intel, Inc. All rights reserved +# +######################### +# +# Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2010 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. +# Copyright (c) 2010-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2013 Mellanox Technologies, Inc. +# All rights reserved. +######################### +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# +# Search the generated warnings for +# keywords regarding skipping or ignoring certain attributes +# Intel: ignore +# Sun C++: skip +# +AC_DEFUN([_PMIX_ATTRIBUTE_FAIL_SEARCH],[ + AC_REQUIRE([AC_PROG_GREP]) + if test -s conftest.err ; then + # icc uses 'invalid attribute' and 'attribute "__XXX__" ignored' + # Sun 12.1 emits 'warning: attribute parameter "__printf__" is undefined' + for i in invalid ignore skip undefined ; do + $GREP -iq $i conftest.err + if test "$?" = "0" ; then + pmix_cv___attribute__[$1]=0 + break; + fi + done + fi +]) + +# +# Check for one specific attribute by compiling with C and C++ +# and possibly using a cross-check. +# +# If the cross-check is defined, a static function "usage" should be +# defined, which is to be called from main (to circumvent warnings +# regarding unused function in main file) +# static int usage (int * argument); +# +# The last argument is for specific CFLAGS, that need to be set +# for the compiler to generate a warning on the cross-check. +# This may need adaption for future compilers / CFLAG-settings. +# +AC_DEFUN([_PMIX_CHECK_SPECIFIC_ATTRIBUTE], [ + AC_MSG_CHECKING([for __attribute__([$1])]) + AC_CACHE_VAL(pmix_cv___attribute__[$1], [ + # + # Try to compile using the C compiler, then C++ + # + AC_TRY_COMPILE([$2],[], + [ + # + # In case we did succeed: Fine, but was this due to the + # attribute being ignored/skipped? Grep for IgNoRe/skip in conftest.err + # and if found, reset the pmix_cv__attribute__var=0 + # + pmix_cv___attribute__[$1]=1 + _PMIX_ATTRIBUTE_FAIL_SEARCH([$1]) + ], + [pmix_cv___attribute__[$1]=0]) + + # Only test C++ if we're building Open MPI (i.e., + # project_ompi). PMIX and ORTE do not use C++ at all, so + # let's not add a C++ compiler into their requirement list. + m4_ifdef([project_ompi], + [if test "$pmix_cv___attribute__[$1]" = "1" ; then + AC_LANG_PUSH(C++) + AC_TRY_COMPILE([ + extern "C" { + $2 + }],[], + [ + pmix_cv___attribute__[$1]=1 + _PMIX_ATTRIBUTE_FAIL_SEARCH([$1]) + ],[pmix_cv___attribute__[$1]=0]) + AC_LANG_POP(C++) + fi]) + + # + # If the attribute is supported by both compilers, + # try to recompile a *cross-check*, IFF defined. + # + if test '(' "$pmix_cv___attribute__[$1]" = "1" -a "[$3]" != "" ')' ; then + ac_c_werror_flag_safe=$ac_c_werror_flag + ac_c_werror_flag="yes" + CFLAGS_safe=$CFLAGS + CFLAGS="$CFLAGS [$4]" + + AC_TRY_COMPILE([$3], + [ + int i=4711; + i=usage(&i); + ], + [pmix_cv___attribute__[$1]=0], + [ + # + # In case we did NOT succeed: Fine, but was this due to the + # attribute being ignored? Grep for IgNoRe in conftest.err + # and if found, reset the pmix_cv__attribute__var=0 + # + pmix_cv___attribute__[$1]=1 + _PMIX_ATTRIBUTE_FAIL_SEARCH([$1]) + ]) + + ac_c_werror_flag=$ac_c_werror_flag_safe + CFLAGS=$CFLAGS_safe + fi + ]) + + if test "$pmix_cv___attribute__[$1]" = "1" ; then + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi +]) + + +# +# Test the availability of __attribute__ and with the help +# of _PMIX_CHECK_SPECIFIC_ATTRIBUTE for the support of +# particular attributes. Compilers, that do not support an +# attribute most often fail with a warning (when the warning +# level is set). +# The compilers output is parsed in _PMIX_ATTRIBUTE_FAIL_SEARCH +# +# To add a new attributes __NAME__ add the +# pmix_cv___attribute__NAME +# add a new check with _PMIX_CHECK_SPECIFIC_ATTRIBUTE (possibly with a cross-check) +# _PMIX_CHECK_SPECIFIC_ATTRIBUTE([name], [int foo (int arg) __attribute__ ((__name__));], [], []) +# and define the corresponding +# AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_NAME, [$pmix_cv___attribute__NAME], +# [Whether your compiler has __attribute__ NAME or not]) +# and decide on a correct macro (in pmix/include/pmix_config_bottom.h): +# # define __pmix_attribute_NAME(x) __attribute__(__NAME__) +# +# Please use the "__"-notation of the attribute in order not to +# clash with predefined names or macros (e.g. const, which some compilers +# do not like..) +# + + +AC_DEFUN([PMIX_CHECK_ATTRIBUTES], [ + AC_LANG(C) + AC_MSG_CHECKING(for __attribute__) + + AC_CACHE_VAL(pmix_cv___attribute__, [ + AC_TRY_COMPILE( + [#include + /* Check for the longest available __attribute__ (since gcc-2.3) */ + struct foo { + char a; + int x[2] __attribute__ ((__packed__)); + }; + ], + [], + [pmix_cv___attribute__=1], + [pmix_cv___attribute__=0], + ) + + if test "$pmix_cv___attribute__" = "1" ; then + AC_TRY_COMPILE( + [#include + /* Check for the longest available __attribute__ (since gcc-2.3) */ + struct foo { + char a; + int x[2] __attribute__ ((__packed__)); + }; + ], + [], + [pmix_cv___attribute__=1], + [pmix_cv___attribute__=0], + ) + fi + ]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE, [$pmix_cv___attribute__], + [Whether your compiler has __attribute__ or not]) + +# +# Now that we know the compiler support __attribute__ let's check which kind of +# attributed are supported. +# + if test "$pmix_cv___attribute__" = "0" ; then + AC_MSG_RESULT([no]) + pmix_cv___attribute__aligned=0 + pmix_cv___attribute__always_inline=0 + pmix_cv___attribute__cold=0 + pmix_cv___attribute__const=0 + pmix_cv___attribute__deprecated=0 + pmix_cv___attribute__deprecated_argument=0 + pmix_cv___attribute__format=0 + pmix_cv___attribute__format_funcptr=0 + pmix_cv___attribute__hot=0 + pmix_cv___attribute__malloc=0 + pmix_cv___attribute__may_alias=0 + pmix_cv___attribute__no_instrument_function=0 + pmix_cv___attribute__nonnull=0 + pmix_cv___attribute__noreturn=0 + pmix_cv___attribute__noreturn_funcptr=0 + pmix_cv___attribute__packed=0 + pmix_cv___attribute__pure=0 + pmix_cv___attribute__sentinel=0 + pmix_cv___attribute__unused=0 + pmix_cv___attribute__visibility=0 + pmix_cv___attribute__warn_unused_result=0 + pmix_cv___attribute__destructor=0 + else + AC_MSG_RESULT([yes]) + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([aligned], + [struct foo { char text[4]; } __attribute__ ((__aligned__(8)));], + [], + []) + + # + # Ignored by PGI-6.2.5; -- recognized by output-parser + # + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([always_inline], + [int foo (int arg) __attribute__ ((__always_inline__));], + [], + []) + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([cold], + [ + int foo(int arg1, int arg2) __attribute__ ((__cold__)); + int foo(int arg1, int arg2) { return arg1 * arg2 + arg1; } + ], + [], + []) + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([const], + [ + int foo(int arg1, int arg2) __attribute__ ((__const__)); + int foo(int arg1, int arg2) { return arg1 * arg2 + arg1; } + ], + [], + []) + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([deprecated], + [ + int foo(int arg1, int arg2) __attribute__ ((__deprecated__)); + int foo(int arg1, int arg2) { return arg1 * arg2 + arg1; } + ], + [], + []) + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([deprecated_argument], + [ + int foo(int arg1, int arg2) __attribute__ ((__deprecated__("compiler allows argument"))); + int foo(int arg1, int arg2) { return arg1 * arg2 + arg1; } + ], + [], + []) + + ATTRIBUTE_CFLAGS= + case "$pmix_c_vendor" in + gnu) + ATTRIBUTE_CFLAGS="-Wall" + ;; + intel) + # we want specifically the warning on format string conversion + ATTRIBUTE_CFLAGS="-we181" + ;; + esac + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([format], + [ + int this_printf (void *my_object, const char *my_format, ...) __attribute__ ((__format__ (__printf__, 2, 3))); + ], + [ + static int usage (int * argument); + extern int this_printf (int arg1, const char *my_format, ...) __attribute__ ((__format__ (__printf__, 2, 3))); + + static int usage (int * argument) { + return this_printf (*argument, "%d", argument); /* This should produce a format warning */ + } + /* The autoconf-generated main-function is int main(), which produces a warning by itself */ + int main(void); + ], + [$ATTRIBUTE_CFLAGS]) + + ATTRIBUTE_CFLAGS= + case "$pmix_c_vendor" in + gnu) + ATTRIBUTE_CFLAGS="-Wall" + ;; + intel) + # we want specifically the warning on format string conversion + ATTRIBUTE_CFLAGS="-we181" + ;; + esac + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([format_funcptr], + [ + int (*this_printf)(void *my_object, const char *my_format, ...) __attribute__ ((__format__ (__printf__, 2, 3))); + ], + [ + static int usage (int * argument); + extern int (*this_printf) (int arg1, const char *my_format, ...) __attribute__ ((__format__ (__printf__, 2, 3))); + + static int usage (int * argument) { + return (*this_printf) (*argument, "%d", argument); /* This should produce a format warning */ + } + /* The autoconf-generated main-function is int main(), which produces a warning by itself */ + int main(void); + ], + [$ATTRIBUTE_CFLAGS]) + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([hot], + [ + int foo(int arg1, int arg2) __attribute__ ((__hot__)); + int foo(int arg1, int arg2) { return arg1 * arg2 + arg1; } + ], + [], + []) + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([malloc], + [ +#ifdef HAVE_STDLIB_H +# include +#endif + int * foo(int arg1) __attribute__ ((__malloc__)); + int * foo(int arg1) { return (int*) malloc(arg1); } + ], + [], + []) + + + # + # Attribute may_alias: No suitable cross-check available, that works for non-supporting compilers + # Ignored by intel-9.1.045 -- turn off with -wd1292 + # Ignored by PGI-6.2.5; ignore not detected due to missing cross-check + # + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([may_alias], + [int * p_value __attribute__ ((__may_alias__));], + [], + []) + + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([no_instrument_function], + [int * foo(int arg1) __attribute__ ((__no_instrument_function__));], + [], + []) + + + # + # Attribute nonnull: + # Ignored by intel-compiler 9.1.045 -- recognized by cross-check + # Ignored by PGI-6.2.5 (pgCC) -- recognized by cross-check + # + ATTRIBUTE_CFLAGS= + case "$pmix_c_vendor" in + gnu) + ATTRIBUTE_CFLAGS="-Wall" + ;; + intel) + # we do not want to get ignored attributes warnings, but rather real warnings + ATTRIBUTE_CFLAGS="-wd1292" + ;; + esac + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([nonnull], + [ + int square(int *arg) __attribute__ ((__nonnull__)); + int square(int *arg) { return *arg; } + ], + [ + static int usage(int * argument); + int square(int * argument) __attribute__ ((__nonnull__)); + int square(int * argument) { return (*argument) * (*argument); } + + static int usage(int * argument) { + return square( ((void*)0) ); /* This should produce an argument must be nonnull warning */ + } + /* The autoconf-generated main-function is int main(), which produces a warning by itself */ + int main(void); + ], + [$ATTRIBUTE_CFLAGS]) + + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([noreturn], + [ +#ifdef HAVE_UNISTD_H +# include +#endif +#ifdef HAVE_STDLIB_H +# include +#endif + void fatal(int arg1) __attribute__ ((__noreturn__)); + void fatal(int arg1) { exit(arg1); } + ], + [], + []) + + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([noreturn_funcptr], + [ +#ifdef HAVE_UNISTD_H +# include +#endif +#ifdef HAVE_STDLIB_H +# include +#endif + extern void (*fatal_exit)(int arg1) __attribute__ ((__noreturn__)); + void fatal(int arg1) { fatal_exit (arg1); } + ], + [], + [$ATTRIBUTE_CFLAGS]) + + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([packed], + [ + struct foo { + char a; + int x[2] __attribute__ ((__packed__)); + }; + ], + [], + []) + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([pure], + [ + int square(int arg) __attribute__ ((__pure__)); + int square(int arg) { return arg * arg; } + ], + [], + []) + + # + # Attribute sentinel: + # Ignored by the intel-9.1.045 -- recognized by cross-check + # intel-10.0beta works fine + # Ignored by PGI-6.2.5 (pgCC) -- recognized by output-parser and cross-check + # Ignored by pathcc-2.2.1 -- recognized by cross-check (through grep ignore) + # + ATTRIBUTE_CFLAGS= + case "$pmix_c_vendor" in + gnu) + ATTRIBUTE_CFLAGS="-Wall" + ;; + intel) + # we do not want to get ignored attributes warnings + ATTRIBUTE_CFLAGS="-wd1292" + ;; + esac + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([sentinel], + [ + int my_execlp(const char * file, const char *arg, ...) __attribute__ ((__sentinel__)); + ], + [ + static int usage(int * argument); + int my_execlp(const char * file, const char *arg, ...) __attribute__ ((__sentinel__)); + + static int usage(int * argument) { + void * last_arg_should_be_null = argument; + return my_execlp ("lala", "/home/there", last_arg_should_be_null); /* This should produce a warning */ + } + /* The autoconf-generated main-function is int main(), which produces a warning by itself */ + int main(void); + ], + [$ATTRIBUTE_CFLAGS]) + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([unused], + [ + int square(int arg1 __attribute__ ((__unused__)), int arg2); + int square(int arg1, int arg2) { return arg2; } + ], + [], + []) + + + # + # Ignored by PGI-6.2.5 (pgCC) -- recognized by the output-parser + # + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([visibility], + [ + int square(int arg1) __attribute__ ((__visibility__("hidden"))); + ], + [], + []) + + + # + # Attribute warn_unused_result: + # Ignored by the intel-compiler 9.1.045 -- recognized by cross-check + # Ignored by pathcc-2.2.1 -- recognized by cross-check (through grep ignore) + # + ATTRIBUTE_CFLAGS= + case "$pmix_c_vendor" in + gnu) + ATTRIBUTE_CFLAGS="-Wall" + ;; + intel) + # we do not want to get ignored attributes warnings + ATTRIBUTE_CFLAGS="-wd1292" + ;; + esac + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([warn_unused_result], + [ + int foo(int arg) __attribute__ ((__warn_unused_result__)); + int foo(int arg) { return arg + 3; } + ], + [ + static int usage(int * argument); + int foo(int arg) __attribute__ ((__warn_unused_result__)); + + int foo(int arg) { return arg + 3; } + static int usage(int * argument) { + foo (*argument); /* Should produce an unused result warning */ + return 0; + } + + /* The autoconf-generated main-function is int main(), which produces a warning by itself */ + int main(void); + ], + [$ATTRIBUTE_CFLAGS]) + + + _PMIX_CHECK_SPECIFIC_ATTRIBUTE([destructor], + [ + void foo(void) __attribute__ ((__destructor__)); + void foo(void) { return ; } + ], + [], + []) + fi + + # Now that all the values are set, define them + + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_ALIGNED, [$pmix_cv___attribute__aligned], + [Whether your compiler has __attribute__ aligned or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_ALWAYS_INLINE, [$pmix_cv___attribute__always_inline], + [Whether your compiler has __attribute__ always_inline or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_COLD, [$pmix_cv___attribute__cold], + [Whether your compiler has __attribute__ cold or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_CONST, [$pmix_cv___attribute__const], + [Whether your compiler has __attribute__ const or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_DEPRECATED, [$pmix_cv___attribute__deprecated], + [Whether your compiler has __attribute__ deprecated or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_DEPRECATED_ARGUMENT, [$pmix_cv___attribute__deprecated_argument], + [Whether your compiler has __attribute__ deprecated with optional argument]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_FORMAT, [$pmix_cv___attribute__format], + [Whether your compiler has __attribute__ format or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_FORMAT_FUNCPTR, [$pmix_cv___attribute__format_funcptr], + [Whether your compiler has __attribute__ format and it works on function pointers]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_HOT, [$pmix_cv___attribute__hot], + [Whether your compiler has __attribute__ hot or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_MALLOC, [$pmix_cv___attribute__malloc], + [Whether your compiler has __attribute__ malloc or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_MAY_ALIAS, [$pmix_cv___attribute__may_alias], + [Whether your compiler has __attribute__ may_alias or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_NO_INSTRUMENT_FUNCTION, [$pmix_cv___attribute__no_instrument_function], + [Whether your compiler has __attribute__ no_instrument_function or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_NONNULL, [$pmix_cv___attribute__nonnull], + [Whether your compiler has __attribute__ nonnull or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_NORETURN, [$pmix_cv___attribute__noreturn], + [Whether your compiler has __attribute__ noreturn or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_NORETURN_FUNCPTR, [$pmix_cv___attribute__noreturn_funcptr], + [Whether your compiler has __attribute__ noreturn and it works on function pointers]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_PACKED, [$pmix_cv___attribute__packed], + [Whether your compiler has __attribute__ packed or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_PURE, [$pmix_cv___attribute__pure], + [Whether your compiler has __attribute__ pure or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_SENTINEL, [$pmix_cv___attribute__sentinel], + [Whether your compiler has __attribute__ sentinel or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_UNUSED, [$pmix_cv___attribute__unused], + [Whether your compiler has __attribute__ unused or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_VISIBILITY, [$pmix_cv___attribute__visibility], + [Whether your compiler has __attribute__ visibility or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_WARN_UNUSED_RESULT, [$pmix_cv___attribute__warn_unused_result], + [Whether your compiler has __attribute__ warn unused result or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_WEAK_ALIAS, [$pmix_cv___attribute__weak_alias], + [Whether your compiler has __attribute__ weak alias or not]) + AC_DEFINE_UNQUOTED(PMIX_HAVE_ATTRIBUTE_DESTRUCTOR, [$pmix_cv___attribute__destructor], + [Whether your compiler has __attribute__ destructor or not]) +]) diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_broken_qsort.m4 b/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_broken_qsort.m4 new file mode 100644 index 0000000000..443bd3bb9b --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_broken_qsort.m4 @@ -0,0 +1,55 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. +dnl Copyright (c) 2014 Intel, Inc. All rights reserved. +dnl Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl +dnl There was some mentioning of broken qsort happened for Solaris that could +dnl cause qsort to return a bad pointer which could cause some badness. +dnl The problem should have been corrected with these patches from SunSolve. +dnl Solaris 10 should be free from this problem. +dnl +dnl 5.8_sparc #108827-27 or later +dnl 5.8_x86 #108828-28 or later +dnl 5.9_sparc #112874-20 or later +dnl 5.9_x86 #114432-07 or later +dnl +dnl For users who could not patch their systems or are convinced that their +dnl native qsort is broken, they could specify this configure flag to use +dnl the pmix_qsort instead. + +# check for broken qsort +# PMIX_CHECK_BROKEN_QSORT(prefix, [action-if-found], [action-if-not-found]) +# -------------------------------------------------------- +AC_DEFUN([PMIX_CHECK_BROKEN_QSORT],[ + AC_ARG_WITH([broken-qsort], + [AC_HELP_STRING([--with-broken-qsort], + [Build with FreeBSD qsort instead of native qsort (default: no)])]) + AC_MSG_CHECKING([for broken qsort]) + + if test "$with_broken_qsort" = "yes"; then + result="yes" + define_result=1 + else + result="no" + define_result=0 + fi + AC_MSG_RESULT([$result]) + AC_DEFINE_UNQUOTED([PMIX_HAVE_BROKEN_QSORT], [$define_result], + [whether qsort is broken or not]) +]) diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_compiler_version.m4 b/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_compiler_version.m4 new file mode 100644 index 0000000000..65010c365f --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_compiler_version.m4 @@ -0,0 +1,99 @@ +dnl -*- shell-script -*- +dnl +dnl PMIx copyrights: +dnl Copyright (c) 2013 Intel, Inc. All rights reserved +dnl +dnl######################## +dnl This code has been adapted from pmix_setup_cc.m4 in the Open MPI +dnl code base - per the Open MPI license, all copyrights are retained below. +dnl +dnl Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. +dnl +dnl######################## +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + + +# PMIX_CHECK_COMPILER_VERSION_ID() +# ---------------------------------------------------- +# Try to figure out the compiler's name and version to detect cases, +# where users compile Open MPI with one version and compile the application +# with a different compiler. +# +AC_DEFUN([PMIX_CHECK_COMPILER_VERSION_ID], +[ + PMIX_CHECK_COMPILER(FAMILYID) + PMIX_CHECK_COMPILER_STRINGIFY(FAMILYNAME) + PMIX_CHECK_COMPILER(VERSION) + PMIX_CHECK_COMPILER_STRINGIFY(VERSION_STR) +])dnl + + +AC_DEFUN([PMIX_CHECK_COMPILER], [ + lower=m4_tolower($1) + AC_CACHE_CHECK([for compiler $lower], pmix_cv_compiler_[$1], + [ + CPPFLAGS_orig=$CPPFLAGS + CPPFLAGS="-I${top_pmix_srcdir}/src/include $CPPFLAGS" + AC_TRY_RUN([ +#include +#include +#include "pmix_portable_platform.h" + +int main (int argc, char * argv[]) +{ + FILE * f; + f=fopen("conftestval", "w"); + if (!f) exit(1); + fprintf (f, "%d", PLATFORM_COMPILER_$1); + return 0; +} + ], [ + eval pmix_cv_compiler_$1=`cat conftestval`; + ], [ + eval pmix_cv_compiler_$1=0 + ], [ + eval pmix_cv_compiler_$1=0 + ]) + CPPFLAGS=$CPPFLAGS_orig + ]) + AC_DEFINE_UNQUOTED([PMIX_BUILD_PLATFORM_COMPILER_$1], $pmix_cv_compiler_[$1], + [The compiler $lower which OMPI was built with]) +])dnl + + +AC_DEFUN([PMIX_CHECK_COMPILER_STRINGIFY], [ + lower=m4_tolower($1) + AC_CACHE_CHECK([for compiler $lower], pmix_cv_compiler_[$1], + [ + CPPFLAGS_orig=$CPPFLAGS + CPPFLAGS="-I${top_pmix_srcdir}/src/include $CPPFLAGS" + AC_TRY_RUN([ +#include +#include +#include "pmix_portable_platform.h" + +int main (int argc, char * argv[]) +{ + FILE * f; + f=fopen("conftestval", "w"); + if (!f) exit(1); + fprintf (f, "%s", _STRINGIFY(PLATFORM_COMPILER_$1)); + return 0; +} + ], [ + eval pmix_cv_compiler_$1=`cat conftestval`; + ], [ + eval pmix_cv_compiler_$1=UNKNOWN + ], [ + eval pmix_cv_compiler_$1=UNKNOWN + ]) + CPPFLAGS=$CPPFLAGS_orig + ]) + AC_DEFINE_UNQUOTED([PMIX_BUILD_PLATFORM_COMPILER_$1], $pmix_cv_compiler_[$1], + [The compiler $lower which PMIX was built with]) +])dnl diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_ident.m4 b/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_ident.m4 new file mode 100644 index 0000000000..d49013ae27 --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_ident.m4 @@ -0,0 +1,97 @@ +dnl -*- shell-script -*- +dnl +dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl +dnl defines: +dnl PMIX_$1_USE_PRAGMA_IDENT +dnl PMIX_$1_USE_IDENT +dnl PMIX_$1_USE_CONST_CHAR_IDENT +dnl + +# PMIX_CHECK_IDENT(compiler-env, compiler-flags, +# file-suffix, lang) Try to compile a source file containing +# a #pragma ident, and determine whether the ident was +# inserted into the resulting object file +# ----------------------------------------------------------- +AC_DEFUN([PMIX_CHECK_IDENT], [ + AC_MSG_CHECKING([for $4 ident string support]) + + pmix_pragma_ident_happy=0 + pmix_ident_happy=0 + pmix_static_const_char_happy=0 + _PMIX_CHECK_IDENT( + [$1], [$2], [$3], + [[#]pragma ident], [], + [pmix_pragma_ident_happy=1 + pmix_message="[#]pragma ident"], + _PMIX_CHECK_IDENT( + [$1], [$2], [$3], + [[#]ident], [], + [pmix_ident_happy=1 + pmix_message="[#]ident"], + _PMIX_CHECK_IDENT( + [$1], [$2], [$3], + [[#]pragma comment(exestr, ], [)], + [pmix_pragma_comment_happy=1 + pmix_message="[#]pragma comment"], + [pmix_static_const_char_happy=1 + pmix_message="static const char[[]]"]))) + + AC_DEFINE_UNQUOTED([PMIX_$1_USE_PRAGMA_IDENT], + [$pmix_pragma_ident_happy], [Use #pragma ident strings for $4 files]) + AC_DEFINE_UNQUOTED([PMIX_$1_USE_IDENT], + [$pmix_ident_happy], [Use #ident strings for $4 files]) + AC_DEFINE_UNQUOTED([PMIX_$1_USE_PRAGMA_COMMENT], + [$pmix_pragma_comment_happy], [Use #pragma comment for $4 files]) + AC_DEFINE_UNQUOTED([PMIX_$1_USE_CONST_CHAR_IDENT], + [$pmix_static_const_char_happy], [Use static const char[] strings for $4 files]) + + AC_MSG_RESULT([$pmix_message]) + + unset pmix_pragma_ident_happy pmix_ident_happy pmix_static_const_char_happy pmix_message +]) + +# _PMIX_CHECK_IDENT(compiler-env, compiler-flags, +# file-suffix, header_prefix, header_suffix, action-if-success, action-if-fail) +# Try to compile a source file containing a #-style ident, +# and determine whether the ident was inserted into the +# resulting object file +# ----------------------------------------------------------- +AC_DEFUN([_PMIX_CHECK_IDENT], [ + eval pmix_compiler="\$$1" + eval pmix_flags="\$$2" + + pmix_ident="string_not_coincidentally_inserted_by_the_compiler" + cat > conftest.$3 <&1 1>/dev/null + pmix_status=$? + AS_IF([test "$pmix_output" != "" -o "$pmix_status" = "0"], + [$6], + [$7])], + [PMIX_LOG_MSG([the failed program was:]) + PMIX_LOG_FILE([conftest.$3]) + $7] + [$7])]) + + unset pmix_compiler pmix_flags pmix_output pmix_status + rm -rf conftest.* conftest${EXEEXT} +])dnl diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_munge.m4 b/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_munge.m4 new file mode 100644 index 0000000000..baf873263d --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_munge.m4 @@ -0,0 +1,83 @@ +# -*- shell-script -*- +# +# Copyright (c) 2015 Intel, Inc. All rights reserved +# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_munge_CONFIG([action-if-found], [action-if-not-found]) +# -------------------------------------------------------------------- +AC_DEFUN([PMIX_MUNGE_CONFIG],[ + + PMIX_VAR_SCOPE_PUSH([pmix_munge_dir pmix_munge_libdir]) + + AC_ARG_WITH([munge], + [AC_HELP_STRING([--with-munge=DIR], + [Search for munge headers and libraries in DIR ])]) + + AC_ARG_WITH([munge-libdir], + [AC_HELP_STRING([--with-munge-libdir=DIR], + [Search for munge libraries in DIR ])]) + + pmix_munge_support=0 + if test "$with_munge" != "no"; then + AC_MSG_CHECKING([for munge in]) + if test ! -z "$with_munge" -a "$with_munge" != "yes"; then + if test -d $with_munge/include/munge; then + pmix_munge_dir=$with_munge/include/munge + else + pmix_munge_dir=$with_munge + fi + if test -d $with_munge/lib; then + pmix_munge_libdir=$with_munge/lib + elif test -d $with_munge/lib64; then + pmix_munge_libdir=$with_munge/lib64 + else + AC_MSG_RESULT([Could not find $with_munge/lib or $with_munge/lib64]) + AC_MSG_ERROR([Can not continue]) + fi + AC_MSG_RESULT([$pmix_munge_dir and $pmix_munge_libdir]) + else + AC_MSG_RESULT([(default search paths)]) + pmix_munge_dir= + fi + AS_IF([test ! -z "$with_munge_libdir" && test "$with_munge_libdir" != "yes"], + [pmix_munge_libdir="$with_munge_libdir"]) + + PMIX_CHECK_PACKAGE([pmix_munge], + [munge.h], + [munge], + [munge_encode], + [-lmunge], + [$pmix_munge_dir], + [$pmix_munge_libdir], + [pmix_munge_support=1], + [pmix_munge_support=0]) + if test $pmix_munge_support == "1"; then + CPPFLAGS="$pmix_munge_CPPFLAGS $CPPFLAGS" + LIBS="$LIBS -lmunge" + LDFLAGS="$pmix_munge_LDFLAGS $LDFLAGS" + fi + fi + + if test ! -z "$with_munge" && test "$with_munge" != "no" && test "$pmix_munge_support" != "1"; then + AC_MSG_WARN([MUNGE SUPPORT REQUESTED AND NOT FOUND.]) + AC_MSG_ERROR([CANNOT CONTINUE]) + fi + + AC_MSG_CHECKING([will munge support be built]) + if test "$pmix_munge_support" != "1"; then + AC_MSG_RESULT([no]) + else + AC_MSG_RESULT([yes]) + fi + + AC_DEFINE_UNQUOTED([PMIX_HAVE_MUNGE], [$pmix_munge_support], + [Whether we have munge support or not]) + + PMIX_VAR_SCOPE_POP +])dnl diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_package.m4 b/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_package.m4 new file mode 100644 index 0000000000..861d07a20e --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_package.m4 @@ -0,0 +1,176 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2012-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2014 Intel, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# _PMIX_CHECK_PACKAGE_HEADER(prefix, header, dir-prefix, +# [action-if-found], [action-if-not-found], +# includes) +# -------------------------------------------------------------------- +AC_DEFUN([_PMIX_CHECK_PACKAGE_HEADER], [ + # This is stolen from autoconf to peek under the covers to get the + # cache variable for the library check. one should not copy this + # code into other places unless you want much pain and suffering + AS_VAR_PUSHDEF([pmix_Header], [ac_cv_header_$2]) + + # so this sucks, but there's no way to get through the progression + # of header includes without killing off the cache variable and trying + # again... + unset pmix_Header + + pmix_check_package_header_happy="no" + AS_IF([test "$3" = "/usr" -o "$3" = "/usr/local"], + [ # try as is... + AC_VERBOSE([looking for header without includes]) + AC_CHECK_HEADERS([$2], [pmix_check_package_header_happy="yes"], []) + AS_IF([test "$pmix_check_package_header_happy" = "no"], + [# no go on the as is - reset the cache and try again + unset pmix_Header])]) + + AS_IF([test "$pmix_check_package_header_happy" = "no"], + [AS_IF([test "$3" != ""], + [$1_CPPFLAGS="$$1_CPPFLAGS -I$3/include" + CPPFLAGS="$CPPFLAGS -I$3/include"]) + AC_CHECK_HEADERS([$2], [pmix_check_package_header_happy="yes"], [], [$6]) + AS_IF([test "$pmix_check_package_header_happy" = "yes"], [$4], [$5])], + [$4]) + unset pmix_check_package_header_happy + + AS_VAR_POPDEF([pmix_Header])dnl +]) + + +# _PMIX_CHECK_PACKAGE_LIB(prefix, library, function, extra-libraries, +# dir-prefix, libdir, +# [action-if-found], [action-if-not-found]]) +# -------------------------------------------------------------------- +AC_DEFUN([_PMIX_CHECK_PACKAGE_LIB], [ + # This is stolen from autoconf to peek under the covers to get the + # cache variable for the library check. one should not copy this + # code into other places unless you want much pain and suffering + AS_LITERAL_IF([$2], + [AS_VAR_PUSHDEF([pmix_Lib], [ac_cv_lib_$2_$3])], + [AS_VAR_PUSHDEF([pmix_Lib], [ac_cv_lib_$2''_$3])])dnl + + # see comment above + unset pmix_Lib + pmix_check_package_lib_happy="no" + AS_IF([test "$6" != ""], + [ # libdir was specified - search only there + $1_LDFLAGS="$$1_LDFLAGS -L$6" + LDFLAGS="$LDFLAGS -L$6" + AC_CHECK_LIB([$2], [$3], + [pmix_check_package_lib_happy="yes"], + [pmix_check_package_lib_happy="no"], [$4]) + AS_IF([test "$pmix_check_package_lib_happy" = "no"], + [LDFLAGS="$pmix_check_package_$1_save_LDFLAGS" + $1_LDFLAGS="$pmix_check_package_$1_orig_LDFLAGS" + unset pmix_Lib])], + [ # libdir was not specified - go through search path + pmix_check_package_libdir="$5" + AS_IF([test "$pmix_check_package_libdir" = "" -o "$pmix_check_package_libdir" = "/usr" -o "$pmix_check_package_libdir" = "/usr/local"], + [ # try as is... + AC_VERBOSE([looking for library without search path]) + AC_CHECK_LIB([$2], [$3], + [pmix_check_package_lib_happy="yes"], + [pmix_check_package_lib_happy="no"], [$4]) + AS_IF([test "$pmix_check_package_lib_happy" = "no"], + [ # no go on the as is.. see what happens later... + LDFLAGS="$pmix_check_package_$1_save_LDFLAGS" + $1_LDFLAGS="$pmix_check_package_$1_orig_LDFLAGS" + unset pmix_Lib])]) + + AS_IF([test "$pmix_check_package_lib_happy" = "no"], + [AS_IF([test "$pmix_check_package_libdir" != ""], + [$1_LDFLAGS="$$1_LDFLAGS -L$pmix_check_package_libdir/lib" + LDFLAGS="$LDFLAGS -L$pmix_check_package_libdir/lib" + AC_VERBOSE([looking for library in lib]) + AC_CHECK_LIB([$2], [$3], + [pmix_check_package_lib_happy="yes"], + [pmix_check_package_lib_happy="no"], [$4]) + AS_IF([test "$pmix_check_package_lib_happy" = "no"], + [ # no go on the as is.. see what happens later... + LDFLAGS="$pmix_check_package_$1_save_LDFLAGS" + $1_LDFLAGS="$pmix_check_package_$1_orig_LDFLAGS" + unset pmix_Lib])])]) + + AS_IF([test "$pmix_check_package_lib_happy" = "no"], + [AS_IF([test "$pmix_check_package_libdir" != ""], + [$1_LDFLAGS="$$1_LDFLAGS -L$pmix_check_package_libdir/lib64" + LDFLAGS="$LDFLAGS -L$pmix_check_package_libdir/lib64" + AC_VERBOSE([looking for library in lib64]) + AC_CHECK_LIB([$2], [$3], + [pmix_check_package_lib_happy="yes"], + [pmix_check_package_lib_happy="no"], [$4]) + AS_IF([test "$pmix_check_package_lib_happy" = "no"], + [ # no go on the as is.. see what happens later... + LDFLAGS="$pmix_check_package_$1_save_LDFLAGS" + $1_LDFLAGS="$pmix_check_package_$1_orig_LDFLAGS" + unset pmix_Lib])])])]) + + AS_IF([test "$pmix_check_package_lib_happy" = "yes"], + [$1_LIBS="-l$2 $4" + $7], [$8]) + + AS_VAR_POPDEF([pmix_Lib])dnl +]) + + +# PMIX_CHECK_PACKAGE(prefix, +# header, +# library, +# function, +# extra-libraries, +# dir-prefix, +# libdir-prefix, +# [action-if-found], [action-if-not-found], +# includes) +# ----------------------------------------------------------- +# check for package defined by header and libs, and probably +# located in dir-prefix, possibly with libs in libdir-prefix. +# Both dir-prefix and libdir-prefix can be empty. Will set +# prefix_{CPPFLAGS, LDFLAGS, LIBS} as needed +AC_DEFUN([PMIX_CHECK_PACKAGE],[ + pmix_check_package_$1_save_CPPFLAGS="$CPPFLAGS" + pmix_check_package_$1_save_LDFLAGS="$LDFLAGS" + pmix_check_package_$1_save_LIBS="$LIBS" + + pmix_check_package_$1_orig_CPPFLAGS="$$1_CPPFLAGS" + pmix_check_package_$1_orig_LDFLAGS="$$1_LDFLAGS" + pmix_check_package_$1_orig_LIBS="$$1_LIBS" + + _PMIX_CHECK_PACKAGE_HEADER([$1], [$2], [$6], + [_PMIX_CHECK_PACKAGE_LIB([$1], [$3], [$4], [$5], [$6], [$7], + [pmix_check_package_happy="yes"], + [pmix_check_package_happy="no"])], + [pmix_check_package_happy="no"], + [$10]) + + AS_IF([test "$pmix_check_package_happy" = "yes"], + [$8], + [$1_CPPFLAGS="$pmix_check_package_$1_orig_CPPFLAGS" + $1_LDFLAGS="$pmix_check_package_$1_orig_LDFLAGS" + $1_LIBS="$pmix_check_package_$1_orig_LIBS" + $9]) + + CPPFLAGS="$pmix_check_package_$1_save_CPPFLAGS" + LDFLAGS="$pmix_check_package_$1_save_LDFLAGS" + LIBS="$pmix_check_package_$1_save_LIBS" +]) diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_sasl.m4 b/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_sasl.m4 new file mode 100644 index 0000000000..dddb4a1540 --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_sasl.m4 @@ -0,0 +1,80 @@ +# -*- shell-script -*- +# +# Copyright (c) 2015 Intel, Inc. All rights reserved +# Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# MCA_sasl_CONFIG([action-if-found], [action-if-not-found]) +# -------------------------------------------------------------------- +AC_DEFUN([PMIX_SASL_CONFIG],[ + + PMIX_VAR_SCOPE_PUSH([pmix_sasl_dir pmix_sasl_libdir]) + + AC_ARG_WITH([sasl], + [AC_HELP_STRING([--with-sasl=DIR], + [Search for sasl headers and libraries in DIR ])], + [], [with_sasl=no]) + + AC_ARG_WITH([sasl-libdir], + [AC_HELP_STRING([--with-sasl-libdir=DIR], + [Search for sasl libraries in DIR ])]) + + pmix_sasl_support=0 + if test "$with_sasl" != "no"; then + AC_MSG_CHECKING([for sasl in]) + if test ! -z "$with_sasl" -a "$with_sasl" != "yes"; then + pmix_sasl_dir=$with_sasl/include/sasl + if test -d $with_sasl/lib; then + pmix_sasl_libdir=$with_sasl/lib + elif test -d $with_sasl/lib64; then + pmix_sasl_libdir=$with_sasl/lib64 + else + AC_MSG_RESULT([Could not find $with_sasl/lib or $with_sasl/lib64]) + AC_MSG_ERROR([Can not continue]) + fi + AC_MSG_RESULT([$pmix_sasl_dir and $pmix_sasl_libdir]) + else + AC_MSG_RESULT([(default search paths)]) + pmix_sasl_dir= + fi + AS_IF([test ! -z "$with_sasl_libdir" && test "$with_sasl_libdir" != "yes"], + [pmix_sasl_libdir="$with_sasl_libdir"]) + + PMIX_CHECK_PACKAGE([pmix_sasl], + [sasl/sasl.h], + [sasl2], + [sasl_server_init], + [-lsasl2], + [$pmix_sasl_dir], + [$pmix_sasl_libdir], + [pmix_sasl_support=1], + [pmix_sasl_support=0]) + if test $pmix_sasl_support == "1"; then + CPPFLAGS="$pmix_sasl_CPPFLAGS $CPPFLAGS" + LIBS="$LIBS -lsasl2" + LDFLAGS="$pmix_sasl_LDFLAGS $LDFLAGS" + fi + fi + + if test ! -z "$with_sasl" && test "$with_sasl" != "no" && test "$pmix_sasl_support" != "1"; then + AC_MSG_WARN([SASL SUPPORT REQUESTED AND NOT FOUND.]) + AC_MSG_ERROR([CANNOT CONTINUE]) + fi + + AC_MSG_CHECKING([will sasl support be built]) + if test "$pmix_sasl_support" != "1"; then + AC_MSG_RESULT([no]) + else + AC_MSG_RESULT([yes]) + fi + + AC_DEFINE_UNQUOTED(PMIX_HAVE_SASL, [$pmix_sasl_support], + [Whether we have sasl support or not]) + + PMIX_VAR_SCOPE_POP +])dnl diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_vendor.m4 b/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_vendor.m4 new file mode 100644 index 0000000000..fa631aea26 --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_vendor.m4 @@ -0,0 +1,252 @@ +dnl -*- shell-script -*- +dnl +dnl PMIx copyrights: +dnl Copyright (c) 2013 Intel, Inc. All rights reserved +dnl +dnl######################## +dnl This code has been adapted from pmix_check_vendor.m4 in the Open MPI +dnl code base - per the Open MPI license, all copyrights are retained below. +dnl +dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. +dnl######################## +dnl Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + + +# PMIX_C_COMPILER_VENDOR(VENDOR_VARIABLE) +# --------------------------------------- +# Set shell variable VENDOR_VARIABLE to the name of the compiler +# vendor for the current C compiler. +# +# See comment for _PMIX_CHECK_COMPILER_VENDOR for a complete +# list of currently detected compilers. +AC_DEFUN([PMIX_C_COMPILER_VENDOR], [ + AC_REQUIRE([AC_PROG_CC]) + + AC_CACHE_CHECK([for the C compiler vendor], + [pmix_cv_c_compiler_vendor], + [AC_LANG_PUSH(C) + _PMIX_CHECK_COMPILER_VENDOR([pmix_cv_c_compiler_vendor]) + AC_LANG_POP(C)]) + + $1="$pmix_cv_c_compiler_vendor" +]) + + +# workaround to avoid syntax error with Autoconf < 2.68: +m4_ifndef([AC_LANG_DEFINES_PROVIDED], + [m4_define([AC_LANG_DEFINES_PROVIDED])]) + +# PMIX_IFDEF_IFELSE(symbol, [action-if-defined], +# [action-if-not-defined]) +# ---------------------------------------------- +# Run compiler to determine if preprocessor symbol "symbol" is +# defined by the compiler. +AC_DEFUN([PMIX_IFDEF_IFELSE], [ + AC_COMPILE_IFELSE([AC_LANG_DEFINES_PROVIDED +#ifndef $1 +#error "symbol $1 not defined" +choke me +#endif], [$2], [$3])]) + + +# PMIX_IF_IFELSE(symbol, [action-if-defined], +# [action-if-not-defined]) +# ---------------------------------------------- +# Run compiler to determine if preprocessor symbol "symbol" is +# defined by the compiler. +AC_DEFUN([PMIX_IF_IFELSE], [ + AC_COMPILE_IFELSE([AC_LANG_DEFINES_PROVIDED +#if !( $1 ) +#error "condition $1 not met" +choke me +#endif], [$2], [$3])]) + + +# _PMIX_CHECK_COMPILER_VENDOR(VENDOR_VARIABLE) +# -------------------------------------------- +# Set shell variable VENDOR_VARIABLE to the name of the compiler +# vendor for the compiler for the current language. Language must be +# one of C, OBJC, or C++. +# +# thanks to http://predef.sourceforge.net/precomp.html for the list +# of defines to check. +AC_DEFUN([_PMIX_CHECK_COMPILER_VENDOR], [ + pmix_check_compiler_vendor_result="unknown" + + # GNU is probably the most common, so check that one as soon as + # possible. Intel pretends to be GNU, so need to check Intel + # before checking for GNU. + + # Intel + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(__INTEL_COMPILER) || defined(__ICC)], + [pmix_check_compiler_vendor_result="intel"])]) + + # GNU + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__GNUC__], + [pmix_check_compiler_vendor_result="gnu" + + # We do not support gccfss as a compiler so die if + # someone tries to use said compiler. gccfss (gcc + # for SPARC Systems) is a compiler that is no longer + # supported by Oracle and it has some major flaws + # that prevents it from actually compiling PMIX code. + # So if we detect it we automatically bail. + + if ($CC --version | grep gccfss) >/dev/null 2>&1; then + AC_MSG_RESULT([gccfss]) + AC_MSG_WARN([Detected gccfss being used to compile Open MPI.]) + AC_MSG_WARN([Because of several issues Open MPI does not support]) + AC_MSG_WARN([the gccfss compiler. Please use a different compiler.]) + AC_MSG_WARN([If you didn't think you used gccfss you may want to]) + AC_MSG_WARN([check to see if the compiler you think you used is]) + AC_MSG_WARN([actually a link to gccfss.]) + AC_MSG_ERROR([Cannot continue]) + fi])]) + + # Borland Turbo C + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__TURBOC__], + [pmix_check_compiler_vendor_result="borland"])]) + + # Borland C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__BORLANDC__], + [pmix_check_compiler_vendor_result="borland"])]) + + # Comeau C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__COMO__], + [pmix_check_compiler_vendor_result="comeau"])]) + + # Compaq C/C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(__DECC) || defined(VAXC) || defined(__VAXC)], + [pmix_check_compiler_vendor_result="compaq"], + [PMIX_IF_IFELSE([defined(__osf__) && defined(__LANGUAGE_C__)], + [pmix_check_compiler_vendor_result="compaq"], + [PMIX_IFDEF_IFELSE([__DECCXX], + [pmix_check_compiler_vendor_result="compaq"])])])]) + + # Cray C/C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([_CRAYC], + [pmix_check_compiler_vendor_result="cray"])]) + + # Diab C/C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__DCC__], + [pmix_check_compiler_vendor_result="diab"])]) + + # Digital Mars + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(__DMC__) || defined(__SC__) || defined(__ZTC__)], + [pmix_check_compiler_vendor_result="digital mars"])]) + + # HP ANSI C / aC++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(__HP_cc) || defined(__HP_aCC)], + [pmix_check_compiler_vendor_result="hp"])]) + + # IBM XL C/C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(__xlC__) || defined(__IBMC__) || defined(__IBMCPP__)], + [pmix_check_compiler_vendor_result="ibm"], + [PMIX_IF_IFELSE([defined(_AIX) && !defined(__GNUC__)], + [pmix_check_compiler_vendor_result="ibm"])])]) + + # KAI C++ (rest in peace) + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__KCC], + [pmix_check_compiler_vendor_result="kai"])]) + + # LCC + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__LCC__], + [pmix_check_compiler_vendor_result="lcc"])]) + + # MetaWare High C/C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__HIGHC__], + [pmix_check_compiler_vendor_result="metaware high"])]) + + # Metrowerks Codewarrior + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__MWERKS__], + [pmix_check_compiler_vendor_result="metrowerks"])]) + + # MIPSpro (SGI) + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(sgi) || defined(__sgi)], + [pmix_check_compiler_vendor_result="sgi"])]) + + # MPW C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(__MRC__) || defined(MPW_C) || defined(MPW_CPLUS)], + [pmix_check_compiler_vendor_result="mpw"])]) + + # Norcroft C + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__CC_NORCROFT], + [pmix_check_compiler_vendor_result="norcroft"])]) + + # Pelles C + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__POCC__], + [pmix_check_compiler_vendor_result="pelles"])]) + + # Portland Group + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__PGI], + [pmix_check_compiler_vendor_result="portland group"])]) + + # SAS/C + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(SASC) || defined(__SASC) || defined(__SASC__)], + [pmix_check_compiler_vendor_result="sas"])]) + + # Sun Workshop C/C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IF_IFELSE([defined(__SUNPRO_C) || defined(__SUNPRO_CC)], + [pmix_check_compiler_vendor_result="sun"])]) + + # TenDRA C/C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__TenDRA__], + [pmix_check_compiler_vendor_result="tendra"])]) + + # Tiny C + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__TINYC__], + [pmix_check_compiler_vendor_result="tiny"])]) + + # USL C + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__USLC__], + [pmix_check_compiler_vendor_result="usl"])]) + + # Watcom C++ + AS_IF([test "$pmix_check_compiler_vendor_result" = "unknown"], + [PMIX_IFDEF_IFELSE([__WATCOMC__], + [pmix_check_compiler_vendor_result="watcom"])]) + + $1="$pmix_check_compiler_vendor_result" + unset pmix_check_compiler_vendor_result +]) diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_visibility.m4 b/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_visibility.m4 new file mode 100644 index 0000000000..5368ead9bf --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/config/pmix_check_visibility.m4 @@ -0,0 +1,90 @@ +# -*- shell-script -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2006-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2009-2011 Oracle and/or its affiliates. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# PMIX_CHECK_VISIBILITY +# -------------------------------------------------------- +AC_DEFUN([PMIX_CHECK_VISIBILITY],[ + AC_REQUIRE([AC_PROG_GREP]) + + # Check if the compiler has support for visibility, like some + # versions of gcc, icc Sun Studio cc. + AC_ARG_ENABLE(visibility, + AC_HELP_STRING([--enable-visibility], + [enable visibility feature of certain compilers/linkers (default: enabled)])) + + pmix_visibility_define=0 + pmix_msg="whether to enable symbol visibility" + + if test "$enable_visibility" = "no"; then + AC_MSG_CHECKING([$pmix_msg]) + AC_MSG_RESULT([no (disabled)]) + else + CFLAGS_orig=$CFLAGS + + pmix_add= + case "$pmix_c_vendor" in + sun) + # Check using Sun Studio -xldscope=hidden flag + pmix_add=-xldscope=hidden + CFLAGS="$PMIX_CFLAGS_BEFORE_PICKY $pmix_add -errwarn=%all" + ;; + + *) + # Check using -fvisibility=hidden + pmix_add=-fvisibility=hidden + CFLAGS="$PMIX_CFLAGS_BEFORE_PICKY $pmix_add -Werror" + ;; + esac + + AC_MSG_CHECKING([if $CC supports $pmix_add]) + AC_LINK_IFELSE([AC_LANG_PROGRAM([[ + #include + __attribute__((visibility("default"))) int foo; + ]],[[fprintf(stderr, "Hello, world\n");]])], + [AS_IF([test -s conftest.err], + [$GREP -iq visibility conftest.err + # If we find "visibility" in the stderr, then + # assume it doesn't work + AS_IF([test "$?" = "0"], [pmix_add=])]) + ], [pmix_add=]) + AS_IF([test "$pmix_add" = ""], + [AC_MSG_RESULT([no])], + [AC_MSG_RESULT([yes])]) + + CFLAGS=$CFLAGS_orig + PMIX_VISIBILITY_CFLAGS=$pmix_add + + if test "$pmix_add" != "" ; then + pmix_visibility_define=1 + AC_MSG_CHECKING([$pmix_msg]) + AC_MSG_RESULT([yes (via $pmix_add)]) + elif test "$enable_visibility" = "yes"; then + AC_MSG_ERROR([Symbol visibility support requested but compiler does not seem to support it. Aborting]) + else + AC_MSG_CHECKING([$pmix_msg]) + AC_MSG_RESULT([no (unsupported)]) + fi + unset pmix_add + fi + + AC_DEFINE_UNQUOTED([PMIX_C_HAVE_VISIBILITY], [$pmix_visibility_define], + [Whether C compiler supports symbol visibility or not]) +]) diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_ensure_contains_optflags.m4 b/opal/mca/pmix/pmix1xx/pmix/config/pmix_ensure_contains_optflags.m4 new file mode 100644 index 0000000000..9e228c3980 --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/config/pmix_ensure_contains_optflags.m4 @@ -0,0 +1,74 @@ +dnl -*- shell-script -*- +dnl +dnl PMIx copyrights: +dnl Copyright (c) 2013 Intel, Inc. All rights reserved +dnl +dnl######################## +dnl This code has been adapted from pmix_ensure_contains_optflags.m4 in the Open MPI +dnl code base - per the Open MPI license, all copyrights are retained below. +dnl +dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2007 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. +dnl Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. +dnl######################## +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl + +AC_DEFUN([PMIX_ENSURE_CONTAINS_OPTFLAGS],[ + +# Modularize this setup so that sub-configure.in scripts can use this +# same setup code. + +################################## +# Optimization flags +################################## + +# If the user did not specify optimization flags, add some (the value +# from $OPTFLAGS) + +co_arg="$1" +co_found=0 +for co_word in $co_arg; do + # See http://www.gnu.org/software/autoconf/manual/html_node/Quadrigraphs.html#Quadrigraphs + # for an explanation of @<:@ and @:>@ -- they m4 expand to [ and ] + case $co_word in + -g) co_found=1 ;; + -g@<:@1-3@:>@) co_found=1 ;; + +K@<:@0-5@:>@) co_found=1 ;; + -O) co_found=1 ;; + -O@<:@0-9@:>@) co_found=1 ;; + -xO) co_found=1 ;; + -xO@<:@0-9@:>@) co_found=1 ;; + -fast) co_found=1 ;; + + # The below Sun Studio flags require or + # trigger -xO optimization + -xvector*) co_found=1 ;; + -xdepend=yes) co_found=1 ;; + + esac +done + +if test "$co_found" = "0"; then + co_result="$OPTFLAGS $co_arg" +else + co_result="$co_arg" +fi + +# Clean up + +unset co_found co_word co_arg +]) diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_functions.m4 b/opal/mca/pmix/pmix1xx/pmix/config/pmix_functions.m4 new file mode 100644 index 0000000000..3ed6d7e4e1 --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/config/pmix_functions.m4 @@ -0,0 +1,540 @@ +dnl -*- shell-script -*- +dnl +dnl PMIx copyrights: +dnl Copyright (c) 2013 Intel, Inc. All rights reserved +dnl +dnl######################## +dnl This code has been adapted from pmix_get_version.m4sh in the Open MPI +dnl code base - per the Open MPI license, all copyrights are retained below. +dnl +dnl Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +dnl University Research and Technology +dnl Corporation. All rights reserved. +dnl Copyright (c) 2004-2005 The University of Tennessee and The University +dnl of Tennessee Research Foundation. All rights +dnl reserved. +dnl Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +dnl University of Stuttgart. All rights reserved. +dnl Copyright (c) 2004-2005 The Regents of the University of California. +dnl All rights reserved. +dnl Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. +dnl Copyright (c) 2009 Oak Ridge National Labs. All rights reserved. +dnl Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved. +dnl######################## +dnl +dnl $COPYRIGHT$ +dnl +dnl Additional copyrights may follow +dnl +dnl $HEADER$ +dnl +dnl Portions of this file derived from GASNet v1.12 (see "GASNet" +dnl comments, below) +dnl Copyright 2004, Dan Bonachea +dnl +dnl IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR +dnl DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT +dnl OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF +dnl CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +dnl +dnl THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, +dnl INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY +dnl AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS +dnl ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO +dnl PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. +dnl + +AC_DEFUN([PMIX_CONFIGURE_SETUP],[ + +# Some helper script functions. Unfortunately, we cannot use $1 kinds +# of arugments here because of the m4 substitution. So we have to set +# special variable names before invoking the function. :-\ + +pmix_show_title() { + cat <@:*) + echo installing to directory \"$prefix\" + ;; + *) + AC_MSG_ERROR(prefix "$prefix" must be an absolute directory path) + ;; +esac + +# BEGIN: Derived from GASNet + +# Suggestion from Paul Hargrove to disable --program-prefix and +# friends. Heavily influenced by GASNet 1.12 acinclude.m4 +# functionality to do the same thing (copyright listed at top of this +# file). + +# echo program_prefix=$program_prefix program_suffix=$program_suffix program_transform_name=$program_transform_name +# undo prefix autoconf automatically adds during cross-compilation +if test "$cross_compiling" = yes && test "$program_prefix" = "${target_alias}-" ; then + program_prefix=NONE +fi +# normalize empty prefix/suffix +if test -z "$program_prefix" ; then + program_prefix=NONE +fi +if test -z "$program_suffix" ; then + program_suffix=NONE +fi +# undo transforms caused by empty prefix/suffix +if expr "$program_transform_name" : 's.^..$' >/dev/null || \ + expr "$program_transform_name" : 's.$$..$' >/dev/null || \ + expr "$program_transform_name" : 's.$$..;s.^..$' >/dev/null ; then + program_transform_name="s,x,x," +fi +if test "$program_prefix$program_suffix$program_transform_name" != "NONENONEs,x,x," ; then + AC_MSG_WARN([*** The Open MPI configure script does not support --program-prefix, --program-suffix or --program-transform-name. Users are recommended to instead use --prefix with a unique directory and make symbolic links as desired for renaming.]) + AC_MSG_ERROR([*** Cannot continue]) +fi + +# END: Derived from GASNet +])dnl + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +AC_DEFUN([PMIX_LOG_MSG],[ +# 1 is the message +# 2 is whether to put a prefix or not +if test -n "$2"; then + echo "configure:__oline__: $1" >&5 +else + echo $1 >&5 +fi])dnl + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +AC_DEFUN([PMIX_LOG_FILE],[ +# 1 is the filename +if test -n "$1" -a -f "$1"; then + cat $1 >&5 +fi])dnl + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +AC_DEFUN([PMIX_LOG_COMMAND],[ +# 1 is the command +# 2 is actions to do if success +# 3 is actions to do if fail +echo "configure:__oline__: $1" >&5 +$1 1>&5 2>&1 +pmix_status=$? +PMIX_LOG_MSG([\$? = $pmix_status], 1) +if test "$pmix_status" = "0"; then + unset pmix_status + $2 +else + unset pmix_status + $3 +fi])dnl + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +AC_DEFUN([PMIX_UNIQ],[ +# 1 is the variable name to be uniq-ized +pmix_name=$1 + +# Go through each item in the variable and only keep the unique ones + +pmix_count=0 +for val in ${$1}; do + pmix_done=0 + pmix_i=1 + pmix_found=0 + + # Loop over every token we've seen so far + + pmix_done="`expr $pmix_i \> $pmix_count`" + while test "$pmix_found" = "0" -a "$pmix_done" = "0"; do + + # Have we seen this token already? Prefix the comparison with + # "x" so that "-Lfoo" values won't be cause an error. + + pmix_eval="expr x$val = x\$pmix_array_$pmix_i" + pmix_found=`eval $pmix_eval` + + # Check the ending condition + + pmix_done="`expr $pmix_i \>= $pmix_count`" + + # Increment the counter + + pmix_i="`expr $pmix_i + 1`" + done + + # Check for special cases where we do want to allow repeated + # arguments (per + # http://www.open-mpi.org/community/lists/devel/2012/08/11362.php). + + case $val in + -Xclang) + pmix_found=0 + pmix_i=`expr $pmix_count + 1` + ;; + esac + + # If we didn't find the token, add it to the "array" + + if test "$pmix_found" = "0"; then + pmix_eval="pmix_array_$pmix_i=$val" + eval $pmix_eval + pmix_count="`expr $pmix_count + 1`" + else + pmix_i="`expr $pmix_i - 1`" + fi +done + +# Take all the items in the "array" and assemble them back into a +# single variable + +pmix_i=1 +pmix_done="`expr $pmix_i \> $pmix_count`" +pmix_newval= +while test "$pmix_done" = "0"; do + pmix_eval="pmix_newval=\"$pmix_newval \$pmix_array_$pmix_i\"" + eval $pmix_eval + + pmix_eval="unset pmix_array_$pmix_i" + eval $pmix_eval + + pmix_done="`expr $pmix_i \>= $pmix_count`" + pmix_i="`expr $pmix_i + 1`" +done + +# Done; do the assignment + +pmix_newval="`echo $pmix_newval`" +pmix_eval="$pmix_name=\"$pmix_newval\"" +eval $pmix_eval + +# Clean up + +unset pmix_name pmix_i pmix_done pmix_newval pmix_eval pmix_count])dnl + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +# PMIX_APPEND_UNIQ(variable, new_argument) +# ---------------------------------------- +# Append new_argument to variable if not already in variable. This assumes a +# space seperated list. +# +# This could probably be made more efficient :(. +AC_DEFUN([PMIX_APPEND_UNIQ], [ +for arg in $2; do + pmix_found=0; + for val in ${$1}; do + if test "x$val" = "x$arg" ; then + pmix_found=1 + break + fi + done + if test "$pmix_found" = "0" ; then + if test -z "$$1"; then + $1="$arg" + else + $1="$$1 $arg" + fi + fi +done +unset pmix_found +]) + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +# Macro that serves as an alternative to using `which `. It is +# preferable to simply using `which ` because backticks (`) (aka +# backquotes) invoke a sub-shell which may source a "noisy" +# ~/.whatever file (and we do not want the error messages to be part +# of the assignment in foo=`which `). This macro ensures that we +# get a sane executable value. +AC_DEFUN([PMIX_WHICH],[ +# 1 is the variable name to do "which" on +# 2 is the variable name to assign the return value to + +PMIX_VAR_SCOPE_PUSH([pmix_prog pmix_file pmix_dir pmix_sentinel]) + +pmix_prog=$1 + +IFS_SAVE=$IFS +IFS="$PATH_SEPARATOR" +for pmix_dir in $PATH; do + if test -x "$pmix_dir/$pmix_prog"; then + $2="$pmix_dir/$pmix_prog" + break + fi +done +IFS=$IFS_SAVE + +PMIX_VAR_SCOPE_POP +])dnl + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +# Declare some variables; use PMIX_VAR_SCOPE_END to ensure that they +# are cleaned up / undefined. +AC_DEFUN([PMIX_VAR_SCOPE_PUSH],[ + + # Is the private index set? If not, set it. + if test "x$pmix_scope_index" = "x"; then + pmix_scope_index=1 + fi + + # First, check to see if any of these variables are already set. + # This is a simple sanity check to ensure we're not already + # overwriting pre-existing variables (that have a non-empty + # value). It's not a perfect check, but at least it's something. + for pmix_var in $1; do + pmix_str="pmix_str=\"\$$pmix_var\"" + eval $pmix_str + + if test "x$pmix_str" != "x"; then + AC_MSG_WARN([Found configure shell variable clash!]) + AC_MSG_WARN([[PMIX_VAR_SCOPE_PUSH] called on "$pmix_var",]) + AC_MSG_WARN([but it is already defined with value "$pmix_str"]) + AC_MSG_WARN([This usually indicates an error in configure.]) + AC_MSG_ERROR([Cannot continue]) + fi + done + + # Ok, we passed the simple sanity check. Save all these names so + # that we can unset them at the end of the scope. + pmix_str="pmix_scope_$pmix_scope_index=\"$1\"" + eval $pmix_str + unset pmix_str + + env | grep pmix_scope + pmix_scope_index=`expr $pmix_scope_index + 1` +])dnl + +# Unset a bunch of variables that were previously set +AC_DEFUN([PMIX_VAR_SCOPE_POP],[ + # Unwind the index + pmix_scope_index=`expr $pmix_scope_index - 1` + pmix_scope_test=`expr $pmix_scope_index \> 0` + if test "$pmix_scope_test" = "0"; then + AC_MSG_WARN([[PMIX_VAR_SCOPE_POP] popped too many PMIX configure scopes.]) + AC_MSG_WARN([This usually indicates an error in configure.]) + AC_MSG_ERROR([Cannot continue]) + fi + + # Get the variable names from that index + pmix_str="pmix_str=\"\$pmix_scope_$pmix_scope_index\"" + eval $pmix_str + + # Iterate over all the variables and unset them all + for pmix_var in $pmix_str; do + unset $pmix_var + done +])dnl + + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +# +# PMIX_WITH_OPTION_MIN_MAX_VALUE(NAME,DEFAULT_VALUE,LOWER_BOUND,UPPER_BOUND) +# Defines a variable PMIX_MAX_xxx, with "xxx" being specified as parameter $1 as "variable_name". +# If not set at configure-time using --with-max-xxx, the default-value ($2) is assumed. +# If set, value is checked against lower (value >= $3) and upper bound (value <= $4) +# +AC_DEFUN([PMIX_WITH_OPTION_MIN_MAX_VALUE], [ + max_value=[$2] + AC_MSG_CHECKING([maximum length of ]m4_translit($1, [_], [ ])) + AC_ARG_WITH([max-]m4_translit($1, [_], [-]), + AC_HELP_STRING([--with-max-]m4_translit($1, [_], [-])[=VALUE], + [maximum length of ]m4_translit($1, [_], [ ])[s. VALUE argument has to be specified (default: [$2]).])) + if test ! -z "$with_max_[$1]" -a "$with_max_[$1]" != "no" ; then + # Ensure it's a number (hopefully an integer!), and >0 + expr $with_max_[$1] + 1 > /dev/null 2> /dev/null + AS_IF([test "$?" != "0"], [happy=0], + [AS_IF([test $with_max_[$1] -ge $3 -a $with_max_[$1] -le $4], + [happy=1], [happy=0])]) + + # If badness in the above tests, bail + AS_IF([test "$happy" = "0"], + [AC_MSG_RESULT([bad value ($with_max_[$1])]) + AC_MSG_WARN([--with-max-]m4_translit($1, [_], [-])[s value must be >= $3 and <= $4]) + AC_MSG_ERROR([Cannot continue])]) + max_value=$with_max_[$1] + fi + AC_MSG_RESULT([$max_value]) + AC_DEFINE_UNQUOTED([PMIX_MAX_]m4_toupper($1), $max_value, + [Maximum length of ]m4_translit($1, [_], [ ])[s (default is $2)]) + [PMIX_MAX_]m4_toupper($1)=$max_value + AC_SUBST([PMIX_MAX_]m4_toupper($1)) +])dnl + +dnl ####################################################################### +dnl ####################################################################### +dnl ####################################################################### + +# Usage: PMIX_COMPUTE_MAX_VALUE(number_bytes, variable_to_set, action if overflow) +# Compute maximum value of datatype of +# number_bytes, setting the result in the second argument. Assumes a +# signed datatype. +AC_DEFUN([PMIX_COMPUTE_MAX_VALUE], [ + # This is more complicated than it really should be. But some + # expr implementations (OpenBSD) have an expr with a max value of + # 2^31 - 1, and we sometimes want to compute the max value of a + # type as big or bigger than that... + pmix_num_bits=`expr $1 \* 8 - 1` + newval=1 + value=1 + overflow=0 + + while test $pmix_num_bits -ne 0 ; do + newval=`expr $value \* 2` + if test 0 -eq `expr $newval \< 0` ; then + # if the new value is not negative, next iteration... + value=$newval + pmix_num_bits=`expr $pmix_num_bits - 1` + # if this was the last iteration, subtract 1 (as signed + # max positive is 2^num_bits - 1). Do this here instead + # of outside of the while loop because we might have + # already subtracted 1 by then if we're trying to find the + # max value of the same datatype expr uses as it's + # internal representation (ie, if we hit the else + # below...) + if test 0 -eq $pmix_num_bits ; then + value=`expr $value - 1` + fi + else + # if the new value is negative, we've over flowed. First, + # try adding value - 1 instead of value (see if we can get + # to positive max of expr) + newval=`expr $value - 1 + $value` + if test 0 -eq `expr $newval \< 0` ; then + value=$newval + # Still positive, this is as high as we can go. If + # pmix_num_bits is 1, we didn't actually overflow. + # Otherwise, we overflowed. + if test 1 -ne $pmix_num_bits ; then + overflow=1 + fi + else + # stil negative. Time to give up. + overflow=1 + fi + pmix_num_bits=0 + fi + done + + AS_VAR_SET([$2], [$value]) + AS_IF([test $overflow -ne 0], [$3]) +])dnl diff --git a/opal/mca/pmix/pmix1xx/pmix/config/pmix_get_version.sh b/opal/mca/pmix/pmix1xx/pmix/config/pmix_get_version.sh new file mode 100755 index 0000000000..6106af60c3 --- /dev/null +++ b/opal/mca/pmix/pmix1xx/pmix/config/pmix_get_version.sh @@ -0,0 +1,161 @@ +#!/bin/sh +# +# Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2008-2015 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2015 Intel, Inc. All rights reserved +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + + + +# PMIX_GET_VERSION(version_file, variable_prefix) +# ----------------------------------------------- +# parse version_file for version information, setting +# the following shell variables: +# +# prefix_VERSION +# prefix_BASE_VERSION +# prefix_MAJOR_VERSION +# prefix_MINOR_VERSION +# prefix_RELEASE_VERSION +# prefix_GREEK_VERSION +# prefix_REPO_REV +# prefix_TARBALL_VERSION +# prefix_RELEASE_DATE + + + +srcfile="$1" +option="$2" + +if test -z "$srcfile"; then + option="--help" +else + + if test -f "$srcfile"; then + srcdir=`dirname $srcfile` + pmix_vers=`sed -n " + t clear + : clear + s/^major/PMIX_MAJOR_VERSION/ + s/^minor/PMIX_MINOR_VERSION/ + s/^release/PMIX_RELEASE_VERSION/ + s/^greek/PMIX_GREEK_VERSION/ + s/^repo_rev/PMIX_REPO_REV/ + s/^tarball_version/PMIX_TARBALL_VERSION/ + s/^date/PMIX_RELEASE_DATE/ + t print + b + : print + p" < "$srcfile"` + eval "$pmix_vers" + + PMIX_VERSION="$PMIX_MAJOR_VERSION.$PMIX_MINOR_VERSION.$PMIX_RELEASE_VERSION" + PMIX_VERSION="${PMIX_VERSION}${PMIX_GREEK_VERSION}" + + if test "$PMIX_TARBALL_VERSION" = ""; then + PMIX_TARBALL_VERSION=$PMIX_VERSION + fi + + # If repo_rev was not set in the VERSION file, then get it now + if test "$PMIX_REPO_REV" = ""; then + # See if we can find the "git" command. + git_happy=0 + git --version > /dev/null 2>&1 + if test $? -eq 0; then + git_happy=1 + fi + + # If we're in a git repo and we found the git command, use + # git describe to get the repo rev + if test -d "$srcdir/.git" && test $git_happy -eq 1; then + if test "$srcdir" != "`pwd`"; then + git_save_dir=`pwd` + cd $srcdir + PMIX_REPO_REV=`git describe --tags --always` + cd $git_save_dir + unset git_save_dir + else + PMIX_REPO_REV=`git describe --tags --always` + fi + else + PMIX_REPO_REV="date`date '+%Y-%m-%d'`" + fi + fi + + + fi + + + if test "$option" = ""; then + option="--full" + fi +fi + +case "$option" in + --full|-v|--version) + echo $PMIX_VERSION + ;; + --major) + echo $PMIX_MAJOR_VERSION + ;; + --minor) + echo $PMIX_MINOR_VERSION + ;; + --release) + echo $PMIX_RELEASE_VERSION + ;; + --greek) + echo $PMIX_GREEK_VERSION + ;; + --repo-rev) + echo $PMIX_REPO_REV + ;; + --tarball) + echo $PMIX_TARBALL_VERSION + ;; + --release-date) + echo $PMIX_RELEASE_DATE + ;; + --all) + echo ${PMIX_VERSION} : ${PMIX_MAJOR_VERSION} : ${PMIX_MINOR_VERSION} : ${PMIX_RELEASE_VERSION} : ${PMIX_GREEK_VERSION} : ${PMIX_REPO_REV} : ${PMIX_TARBALL_VERSION} + ;; + -h|--help) + cat <