From 5b9fa7e998b61a27846c3ceeca516df6b11facd5 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Fri, 20 Jul 2007 02:34:29 +0000 Subject: [PATCH] reapply r15517 and r15520, which were removed in r15527 so that I could get the RML/OOB merge in slightly easier This commit was SVN r15530. The following SVN revision numbers were found above: r15517 --> open-mpi/ompi@41977fcc95e1a1b5658e3ca7a43fa4fadb801dcc r15520 --> open-mpi/ompi@9cbc9df1b8c1f95df52d1c8e7ba4cf1c2605e4bc r15527 --> open-mpi/ompi@2d17dd95169a96febbf38eef48912bf38fc65aa9 --- configure.ac | 1 - ompi/communicator/comm.c | 2 +- ompi/mca/btl/base/btl_base_error.c | 4 +- ompi/mca/btl/base/btl_base_error.h | 21 +- ompi/mca/btl/gm/btl_gm_component.c | 4 +- ompi/mca/btl/gm/btl_gm_proc.c | 12 +- ompi/mca/btl/mvapi/btl_mvapi.c | 4 +- ompi/mca/btl/mvapi/btl_mvapi_proc.c | 8 +- ompi/mca/btl/mx/btl_mx_proc.c | 8 +- ompi/mca/btl/ofud/btl_ofud_proc.c | 8 +- ompi/mca/btl/openib/btl_openib_proc.c | 8 +- ompi/mca/btl/tcp/btl_tcp_endpoint.c | 4 +- ompi/mca/btl/udapl/btl_udapl_proc.c | 8 +- ompi/mca/crcp/coord/crcp_coord_pml.c | 315 +++--- ompi/mca/mpool/rdma/mpool_rdma_module.c | 4 +- ompi/mca/pml/base/pml_base_select.c | 12 +- ompi/proc/proc.c | 6 +- ompi/runtime/ompi_mpi_init.c | 4 +- orte/class/orte_proc_table.c | 2 +- orte/include/orte/orte_types.h | 68 +- orte/mca/errmgr/base/errmgr_base_fns.c | 11 +- .../mca/errmgr/bproc/errmgr_bproc_component.c | 4 +- orte/mca/errmgr/hnp/errmgr_hnp_component.c | 4 +- .../mca/errmgr/orted/errmgr_orted_component.c | 4 +- .../mca/errmgr/proxy/errmgr_proxy_component.c | 4 +- orte/mca/filem/base/filem_base_fns.c | 9 +- orte/mca/filem/rsh/filem_rsh_module.c | 6 +- orte/mca/gpr/proxy/gpr_proxy_component.c | 8 +- orte/mca/gpr/proxy/gpr_proxy_compound_cmd.c | 2 +- .../proxy/gpr_proxy_dump_local_trigs_subs.c | 4 +- .../replica/api_layer/gpr_replica_dump_api.c | 16 +- .../gpr_replica_dump_local_trigs_subs_api.c | 8 +- .../gpr_replica_recv_proxy_msgs.c | 4 +- .../communications/gpr_replica_remote_msg.c | 2 +- .../functional_layer/gpr_replica_cleanup_fn.c | 4 +- .../functional_layer/gpr_replica_dump_fn.c | 16 +- .../functional_layer/gpr_replica_put_get_fn.c | 18 +- .../gpr_replica_subscribe_fn.c | 4 +- orte/mca/grpcomm/basic/grpcomm_basic_module.c | 21 +- orte/mca/iof/README.txt | 3 +- orte/mca/iof/proxy/iof_proxy.c | 2 +- orte/mca/iof/svc/iof_svc_component.c | 4 +- orte/mca/iof/svc/iof_svc_proxy.c | 20 +- orte/mca/iof/svc/iof_svc_pub.c | 4 +- orte/mca/iof/svc/iof_svc_sub.c | 24 +- orte/mca/ns/base/Makefile.am | 3 +- orte/mca/ns/base/base.h | 1 + .../ns_data_type_compare_fns.c | 27 - .../data_type_support/ns_data_type_copy_fns.c | 20 - .../ns_data_type_packing_fns.c | 38 - .../ns_data_type_print_fns.c | 8 +- .../data_type_support/ns_data_type_size_fns.c | 4 - .../ns_data_type_unpacking_fns.c | 41 +- orte/mca/ns/base/ns_base_diag_fns.c | 7 - ...{ns_base_cell_fns.c => ns_base_node_fns.c} | 112 +- orte/mca/ns/base/ns_base_open.c | 35 +- orte/mca/ns/base/ns_base_print_name_args.c | 82 ++ orte/mca/ns/base/ns_base_vpid_name_fns.c | 79 +- orte/mca/ns/base/ns_private.h | 38 +- orte/mca/ns/ns.h | 148 +-- orte/mca/ns/ns_types.h | 17 +- orte/mca/ns/proxy/ns_proxy.h | 13 +- orte/mca/ns/proxy/ns_proxy_cell_fns.c | 197 +--- orte/mca/ns/proxy/ns_proxy_component.c | 18 - orte/mca/ns/proxy/ns_proxy_diag_fns.c | 68 +- orte/mca/ns/proxy/ns_proxy_general_fns.c | 29 +- orte/mca/ns/replica/ns_replica.c | 452 -------- orte/mca/ns/replica/ns_replica.h | 44 +- orte/mca/ns/replica/ns_replica_cell_fns.c | 243 +--- .../ns/replica/ns_replica_class_instances.h | 61 - orte/mca/ns/replica/ns_replica_component.c | 26 +- orte/mca/ns/replica/ns_replica_diag_fns.c | 66 -- orte/mca/ns/replica/ns_replica_general_fns.c | 32 +- orte/mca/ns/replica/ns_replica_recv.c | 79 +- orte/mca/odls/bproc/odls_bproc.c | 4 +- orte/mca/odls/default/odls_default_module.c | 45 +- orte/mca/odls/process/odls_process_module.c | 45 +- orte/mca/oob/tcp/oob_tcp.c | 71 +- orte/mca/oob/tcp/oob_tcp_msg.c | 38 +- orte/mca/oob/tcp/oob_tcp_peer.c | 150 +-- orte/mca/oob/tcp/oob_tcp_ping.c | 36 +- orte/mca/oob/tcp/oob_tcp_send.c | 6 +- orte/mca/pls/base/pls_base_dmn_registry_fns.c | 376 ------ orte/mca/pls/bproc/pls_bproc.c | 29 +- orte/mca/pls/bproc/pls_bproc.h | 2 +- orte/mca/pls/bproc/pls_bproc_state.c | 4 +- orte/mca/pls/lsf/pls_lsf_module.c | 1 - orte/mca/pls/poe/pls_poe_module.c | 2 +- orte/mca/pls/slurm/pls_slurm_module.c | 1 - orte/mca/pls/xcpu/pls_xcpu.c | 6 +- .../ras_data_type_compare_fns.c | 5 +- .../ras_data_type_copy_fns.c | 1 - .../ras_data_type_packing_fns.c | 7 - .../ras_data_type_print_fns.c | 4 +- .../ras_data_type_unpacking_fns.c | 8 - orte/mca/ras/base/ras_base_no_ops.c | 2 +- orte/mca/ras/base/ras_base_node.c | 67 +- orte/mca/ras/base/ras_private.h | 6 +- orte/mca/ras/dash_host/ras_dash_host_module.c | 3 - .../ras/gridengine/ras_gridengine_module.c | 5 +- .../ras/loadleveler/ras_loadleveler_module.c | 1 - orte/mca/ras/localhost/ras_localhost_module.c | 3 - orte/mca/ras/ras.h | 5 +- orte/mca/ras/ras_types.h | 2 - orte/mca/ras/slurm/ras_slurm_module.c | 3 - orte/mca/ras/tm/ras_tm_module.c | 1 - orte/mca/ras/xgrid/src/ras_xgrid_module.c | 1 - orte/mca/rds/base/rds_base_open.c | 42 - orte/mca/rds/base/rds_base_registry_fns.c | 3 + orte/mca/rds/hostfile/rds_hostfile.c | 115 +- orte/mca/rds/rds.h | 17 - orte/mca/rds/rds_types.h | 29 - orte/mca/rds/resfile/Makefile.am | 52 - orte/mca/rds/resfile/configure.params | 24 - orte/mca/rds/resfile/rds_resfile.c | 357 ------ orte/mca/rds/resfile/rds_resfile.h | 80 -- orte/mca/rds/resfile/rds_resfile_component.c | 131 --- .../resfile/rds_resfile_parse_attributes.c | 341 ------ .../rmaps_data_type_copy_fns.c | 1 - .../rmaps_data_type_packing_fns.c | 8 +- .../rmaps_data_type_print_fns.c | 4 +- .../rmaps_data_type_unpacking_fns.c | 8 - orte/mca/rmaps/base/rmaps_base_registry_fns.c | 48 +- orte/mca/rmaps/base/rmaps_base_support_fns.c | 16 +- orte/mca/rmaps/base/rmaps_class_instances.h | 2 - orte/mca/rmaps/base/rmaps_private.h | 4 +- orte/mca/rmaps/rmaps.h | 2 +- orte/mca/rmaps/rmaps_types.h | 1 - orte/mca/rmgr/base/rmgr_base_xconnect.c | 1 - orte/mca/rmgr/proxy/rmgr_proxy.c | 12 +- orte/mca/rmgr/rmgr.h | 6 +- orte/mca/rmgr/urm/rmgr_urm.c | 35 +- orte/mca/rml/base/rml_base_contact.c | 4 +- .../mca/routed/unity/routed_unity_component.c | 12 +- orte/mca/schema/base/base.h | 3 +- orte/mca/schema/base/schema_base_fns.c | 74 +- orte/mca/schema/base/schema_base_open.c | 1 - orte/mca/schema/schema.h | 7 - orte/mca/schema/schema_types.h | 1 - orte/mca/sds/base/base.h | 6 +- orte/mca/sds/base/sds_base_put.c | 43 +- orte/mca/sds/bproc/sds_bproc_module.c | 14 - orte/mca/sds/cnos/sds_cnos_module.c | 5 +- orte/mca/sds/env/sds_env_module.c | 14 - .../portals_utcp/sds_portals_utcp_module.c | 5 +- orte/mca/sds/slurm/sds_slurm_module.c | 16 +- orte/mca/sds/xcpu/sds_xcpu_module.c | 18 +- orte/mca/smr/base/smr_base_get_node_state.c | 3 +- orte/mca/smr/base/smr_base_open.c | 1 - orte/mca/smr/base/smr_base_set_node_state.c | 5 +- orte/mca/smr/base/smr_private.h | 5 +- orte/mca/smr/bproc/smr_bproc.c | 3 +- orte/mca/smr/smr.h | 4 +- orte/mca/snapc/base/snapc_base_fns.c | 3 - orte/mca/snapc/full/snapc_full_app.c | 3 +- orte/mca/snapc/full/snapc_full_global.c | 9 +- orte/mca/snapc/full/snapc_full_local.c | 1 - orte/orted/orted_main.c | 53 +- orte/runtime/Makefile.am | 2 - orte/runtime/orte_init_stage1.c | 40 +- orte/runtime/orte_monitor.c | 4 +- orte/runtime/orte_restart.c | 4 +- orte/runtime/orte_setup_hnp.c | 579 ---------- orte/runtime/orte_setup_hnp.h | 44 - orte/test/system/orte_abort.c | 2 +- orte/test/system/orte_nodename.c | 4 +- orte/test/system/orte_ring.c | 2 - orte/test/system/orte_spawn.c | 3 +- orte/test/system/orte_stage_gate.c | 4 +- orte/test/system/spawn_child.c | 2 +- orte/test/system/spin.c | 2 +- orte/test/unit/dss/dss_cmp.c | 1 - orte/test/unit/dss/dss_copy.c | 1 - orte/test/unit/dss/dss_inc_dec.c | 1 - orte/test/unit/dss/dss_print.c | 1 - orte/test/unit/dss/dss_release.c | 1 - orte/test/unit/dss/dss_set_get.c | 1 - orte/test/unit/dss/dss_size.c | 1 - orte/test/unit/gpr/gpr_dt_copy.c | 1 - orte/test/unit/gpr/gpr_dt_print.c | 1 - orte/test/unit/gpr/gpr_dt_release.c | 1 - orte/test/unit/gpr/gpr_dt_size.c | 1 - orte/test/unit/ns/ns_peers.c | 8 +- orte/test/unit/ns/ns_string_fns.c | 33 +- orte/tools/Makefile.am | 2 - orte/tools/console/Makefile.am | 45 - orte/tools/console/help-orteconsole.txt | 56 - orte/tools/console/orteconsole.c | 1006 ----------------- orte/tools/console/orteconsole.h | 119 -- orte/tools/orte-ps/orte-ps.c | 11 +- orte/tools/orte-restart/orte-restart.c | 3 +- 191 files changed, 911 insertions(+), 6364 deletions(-) rename orte/mca/ns/base/{ns_base_cell_fns.c => ns_base_node_fns.c} (50%) create mode 100644 orte/mca/ns/base/ns_base_print_name_args.c delete mode 100644 orte/mca/rds/resfile/Makefile.am delete mode 100644 orte/mca/rds/resfile/configure.params delete mode 100644 orte/mca/rds/resfile/rds_resfile.c delete mode 100644 orte/mca/rds/resfile/rds_resfile.h delete mode 100644 orte/mca/rds/resfile/rds_resfile_component.c delete mode 100644 orte/mca/rds/resfile/rds_resfile_parse_attributes.c delete mode 100644 orte/runtime/orte_setup_hnp.c delete mode 100644 orte/runtime/orte_setup_hnp.h delete mode 100644 orte/tools/console/Makefile.am delete mode 100644 orte/tools/console/help-orteconsole.txt delete mode 100644 orte/tools/console/orteconsole.c delete mode 100644 orte/tools/console/orteconsole.h diff --git a/configure.ac b/configure.ac index 5b8859c7a1..a5359e06c7 100644 --- a/configure.ac +++ b/configure.ac @@ -1190,7 +1190,6 @@ AC_CONFIG_FILES([ orte/include/Makefile orte/etc/Makefile - orte/tools/console/Makefile orte/tools/orteboot/Makefile orte/tools/orted/Makefile orte/tools/ortehalt/Makefile diff --git a/ompi/communicator/comm.c b/ompi/communicator/comm.c index f223023ad1..fe6e021e29 100644 --- a/ompi/communicator/comm.c +++ b/ompi/communicator/comm.c @@ -1068,7 +1068,7 @@ int ompi_comm_determine_first ( ompi_communicator_t *intercomm, int high ) ourproc = intercomm->c_local_group->grp_proc_pointers[0]; theirproc = intercomm->c_remote_group->grp_proc_pointers[0]; - mask = ORTE_NS_CMP_CELLID | ORTE_NS_CMP_JOBID | ORTE_NS_CMP_VPID; + mask = ORTE_NS_CMP_JOBID | ORTE_NS_CMP_VPID; rc = orte_ns.compare_fields(mask, &(ourproc->proc_name), &(theirproc->proc_name)); if ( 0 > rc ) { flag = true; diff --git a/ompi/mca/btl/base/btl_base_error.c b/ompi/mca/btl/base/btl_base_error.c index 0d36847af6..ed78ecc596 100644 --- a/ompi/mca/btl/base/btl_base_error.c +++ b/ompi/mca/btl/base/btl_base_error.c @@ -26,6 +26,7 @@ #include "btl_base_error.h" #include "opal/util/show_help.h" #include "orte/util/sys_info.h" +#include "orte/mca/ns/ns_types.h" int mca_btl_base_debug; @@ -59,8 +60,7 @@ void mca_btl_base_error_no_nics(const char* transport, char *procid; if (mca_btl_base_warn_component_unused) { /* print out no-nic warning if user told us to */ - asprintf(&procid, "[%lu,%lu,%lu]", - ORTE_NAME_ARGS(orte_process_info.my_name)); + asprintf(&procid, "%s", ORTE_NAME_PRINT(orte_process_info.my_name)); opal_show_help("help-mpi-btl-base.txt", "btl:no-nics", true, procid, transport, orte_system_info.nodename, diff --git a/ompi/mca/btl/base/btl_base_error.h b/ompi/mca/btl/base/btl_base_error.h index 8cbc64c607..1d4bd55487 100644 --- a/ompi/mca/btl/base/btl_base_error.h +++ b/ompi/mca/btl/base/btl_base_error.h @@ -27,6 +27,7 @@ #include "orte/util/proc_info.h" #include "orte/util/sys_info.h" +#include "orte/mca/ns/ns_types.h" OMPI_DECLSPEC extern int mca_btl_base_debug; @@ -35,9 +36,9 @@ extern int mca_btl_base_out(const char*, ...); #define BTL_OUTPUT(args) \ do { \ - mca_btl_base_out("[%s][%ld,%ld,%ld][%s:%d:%s] ", \ + mca_btl_base_out("[%s]%s[%s:%d:%s] ", \ orte_system_info.nodename, \ - ORTE_NAME_ARGS(orte_process_info.my_name), \ + ORTE_NAME_PRINT(orte_process_info.my_name), \ __FILE__, __LINE__, __func__); \ mca_btl_base_out args; \ mca_btl_base_out("\n"); \ @@ -46,9 +47,9 @@ do { \ #define BTL_ERROR(args) \ do { \ - mca_btl_base_err("[%s][%ld,%ld,%ld][%s:%d:%s] ", \ + mca_btl_base_err("[%s]%s[%s:%d:%s] ", \ orte_system_info.nodename, \ - ORTE_NAME_ARGS(orte_process_info.my_name), \ + ORTE_NAME_PRINT(orte_process_info.my_name), \ __FILE__, __LINE__, __func__); \ mca_btl_base_err args; \ mca_btl_base_err("\n"); \ @@ -56,8 +57,8 @@ do { \ #define BTL_PEER_ERROR(proc, args) \ do { \ - mca_btl_base_err("[%ld,%ld,%ld][%s:%d:%s] from %s ", \ - ORTE_NAME_ARGS(orte_process_info.my_name), \ + mca_btl_base_err("%s[%s:%d:%s] from %s ", \ + ORTE_NAME_PRINT(orte_process_info.my_name), \ __FILE__, __LINE__, __func__, \ orte_system_info.nodename); \ if(proc && proc->proc_hostname) { \ @@ -72,9 +73,9 @@ do { \ #define BTL_DEBUG(args) \ do { \ if(mca_btl_base_debug) { \ - mca_btl_base_err("[%s][%ld,%ld,%ld][%s:%d:%s] ", \ + mca_btl_base_err("[%s]%s[%s:%d:%s] ", \ orte_system_info.nodename, \ - ORTE_NAME_ARGS(orte_process_info.my_name), \ + ORTE_NAME_PRINT(orte_process_info.my_name), \ __FILE__, __LINE__, __func__); \ mca_btl_base_err args; \ mca_btl_base_err("\n"); \ @@ -83,9 +84,9 @@ do { \ #define BTL_VERBOSE(args) \ do { \ if(mca_btl_base_debug > 1) { \ - mca_btl_base_err("[%s][%ld,%ld,%ld][%s:%d:%s] ", \ + mca_btl_base_err("[%s]%s[%s:%d:%s] ", \ orte_system_info.nodename, \ - ORTE_NAME_ARGS(orte_process_info.my_name), \ + ORTE_NAME_PRINT(orte_process_info.my_name), \ __FILE__, __LINE__, __func__); \ mca_btl_base_err args; \ mca_btl_base_err("\n"); \ diff --git a/ompi/mca/btl/gm/btl_gm_component.c b/ompi/mca/btl/gm/btl_gm_component.c index c8fa3d5d5f..a0c2e66ad4 100644 --- a/ompi/mca/btl/gm/btl_gm_component.c +++ b/ompi/mca/btl/gm/btl_gm_component.c @@ -429,10 +429,10 @@ static int mca_btl_gm_discover( void ) if(mca_btl_gm_component.gm_debug > 0) { opal_output(0, - "[%ld,%ld,%ld] gm_port %08lX, " + "%s gm_port %08lX, " "board %" PRIu32 ", global %" PRIu32 " " "node %" PRIu32 "port %" PRIu32 "\n", - ORTE_NAME_ARGS(orte_process_info.my_name), + ORTE_NAME_PRINT(orte_process_info.my_name), (unsigned long) port, board_no, global_id, node_id, port_no); } diff --git a/ompi/mca/btl/gm/btl_gm_proc.c b/ompi/mca/btl/gm/btl_gm_proc.c index 2dfcf2c98e..cb2b11d3cf 100644 --- a/ompi/mca/btl/gm/btl_gm_proc.c +++ b/ompi/mca/btl/gm/btl_gm_proc.c @@ -128,15 +128,15 @@ mca_btl_gm_proc_t* mca_btl_gm_proc_create(ompi_proc_t* ompi_proc) (void*)&gm_proc->proc_addrs, &size); if(OMPI_SUCCESS != rc) { - opal_output(0, "[%s:%d] ompi_modex_recv failed for peer [%ld,%ld,%ld]", - __FILE__,__LINE__,ORTE_NAME_ARGS(&ompi_proc->proc_name)); + opal_output(0, "[%s:%d] ompi_modex_recv failed for peer %s", + __FILE__,__LINE__,ORTE_NAME_PRINT(&ompi_proc->proc_name)); OBJ_RELEASE(gm_proc); return NULL; } if((size % sizeof(mca_btl_gm_addr_t)) != 0) { - opal_output(0, "[%s:%d] invalid gm address for peer [%ld,%ld,%ld]", - __FILE__,__LINE__,ORTE_NAME_ARGS(&ompi_proc->proc_name)); + opal_output(0, "[%s:%d] invalid gm address for peer %s", + __FILE__,__LINE__,ORTE_NAME_PRINT(&ompi_proc->proc_name)); OBJ_RELEASE(gm_proc); return NULL; } @@ -189,9 +189,9 @@ int mca_btl_gm_proc_insert( return OMPI_ERROR; } if(mca_btl_gm_component.gm_debug > 0) { - opal_output(0, "[%ld,%ld,%ld] mapped global id %" PRIu32 + opal_output(0, "%s mapped global id %" PRIu32 " to node id %" PRIu32 "\n", - ORTE_NAME_ARGS(orte_process_info.my_name), + ORTE_NAME_PRINT(orte_process_info.my_name), gm_endpoint->endpoint_addr.global_id, gm_endpoint->endpoint_addr.node_id); } diff --git a/ompi/mca/btl/mvapi/btl_mvapi.c b/ompi/mca/btl/mvapi/btl_mvapi.c index 279b8c05e2..612f94b55b 100644 --- a/ompi/mca/btl/mvapi/btl_mvapi.c +++ b/ompi/mca/btl/mvapi/btl_mvapi.c @@ -798,8 +798,8 @@ void mca_btl_mvapi_dump( opal_output( 0, "No endpoint for this peer\n" ); return; } - opal_output( 0, "endpoint with processor (%lu.%lu.%lu)\n", - ORTE_NAME_ARGS( &(endpoint->endpoint_proc->proc_ompi->proc_name) ) ); + opal_output( 0, "endpoint with processor %s\n", + ORTE_NAME_PRINT( &(endpoint->endpoint_proc->proc_ompi->proc_name) ) ); opal_output( 0, "endpoint state: %s\n", (endpoint->endpoint_state == MCA_BTL_IB_CONNECTING ? "connecting" : (endpoint->endpoint_state == MCA_BTL_IB_CONNECT_ACK ? "waiting ack" : diff --git a/ompi/mca/btl/mvapi/btl_mvapi_proc.c b/ompi/mca/btl/mvapi/btl_mvapi_proc.c index 27f1d610e5..8047c8256c 100644 --- a/ompi/mca/btl/mvapi/btl_mvapi_proc.c +++ b/ompi/mca/btl/mvapi/btl_mvapi_proc.c @@ -140,15 +140,15 @@ mca_btl_mvapi_proc_t* mca_btl_mvapi_proc_create(ompi_proc_t* ompi_proc) if(OMPI_SUCCESS != rc) { - opal_output(0, "[%s:%d] ompi_modex_recv failed for peer [%ld,%ld,%ld]", - __FILE__,__LINE__,ORTE_NAME_ARGS(&ompi_proc->proc_name)); + opal_output(0, "[%s:%d] ompi_modex_recv failed for peer %s", + __FILE__,__LINE__,ORTE_NAME_PRINT(&ompi_proc->proc_name)); OBJ_RELEASE(mvapi_proc); return NULL; } if((size % sizeof(mca_btl_mvapi_port_info_t)) != 0) { - opal_output(0, "[%s:%d] invalid mvapi address for peer [%ld,%ld,%ld]", - __FILE__,__LINE__,ORTE_NAME_ARGS(&ompi_proc->proc_name)); + opal_output(0, "[%s:%d] invalid mvapi address for peer %s", + __FILE__,__LINE__,ORTE_NAME_PRINT(&ompi_proc->proc_name)); OBJ_RELEASE(mvapi_proc); return NULL; } diff --git a/ompi/mca/btl/mx/btl_mx_proc.c b/ompi/mca/btl/mx/btl_mx_proc.c index 5cfd6d03e7..cef9cc18c1 100644 --- a/ompi/mca/btl/mx/btl_mx_proc.c +++ b/ompi/mca/btl/mx/btl_mx_proc.c @@ -121,8 +121,8 @@ mca_btl_mx_proc_t* mca_btl_mx_proc_create(ompi_proc_t* ompi_proc) rc = ompi_modex_recv( &mca_btl_mx_component.super.btl_version, ompi_proc, (void*)&mx_peers, &size ); if( OMPI_SUCCESS != rc ) { - opal_output( 0, "mca_pml_base_modex_recv failed for peer [%ld,%ld,%ld]", - ORTE_NAME_ARGS(&ompi_proc->proc_name) ); + opal_output( 0, "mca_pml_base_modex_recv failed for peer %s", + ORTE_NAME_PRINT(&ompi_proc->proc_name) ); return NULL; } @@ -130,8 +130,8 @@ mca_btl_mx_proc_t* mca_btl_mx_proc_create(ompi_proc_t* ompi_proc) return NULL; } if( (size % sizeof(mca_btl_mx_addr_t)) != 0 ) { - opal_output( 0, "invalid mx address for peer [%ld,%ld,%ld]", - ORTE_NAME_ARGS(&ompi_proc->proc_name) ); + opal_output( 0, "invalid mx address for peer %s", + ORTE_NAME_PRINT(&ompi_proc->proc_name) ); return NULL; } diff --git a/ompi/mca/btl/ofud/btl_ofud_proc.c b/ompi/mca/btl/ofud/btl_ofud_proc.c index 36fe2799c4..ddd3a96bf6 100644 --- a/ompi/mca/btl/ofud/btl_ofud_proc.c +++ b/ompi/mca/btl/ofud/btl_ofud_proc.c @@ -127,15 +127,15 @@ mca_btl_ud_proc_t* mca_btl_ud_proc_create(ompi_proc_t* ompi_proc) if(OMPI_SUCCESS != rc) { opal_output(0, - "[%s:%d] ompi_modex_recv failed for peer [%ld,%ld,%ld]", - __FILE__,__LINE__,ORTE_NAME_ARGS(&ompi_proc->proc_name)); + "[%s:%d] ompi_modex_recv failed for peer %s", + __FILE__,__LINE__,ORTE_NAME_PRINT(&ompi_proc->proc_name)); OBJ_RELEASE(module_proc); return NULL; } if((size % sizeof(mca_btl_ud_addr_t)) != 0) { - opal_output(0, "[%s:%d] invalid module address for peer [%ld,%ld,%ld]", - __FILE__,__LINE__,ORTE_NAME_ARGS(&ompi_proc->proc_name)); + opal_output(0, "[%s:%d] invalid module address for peer %s", + __FILE__,__LINE__,ORTE_NAME_PRINT(&ompi_proc->proc_name)); OBJ_RELEASE(module_proc); return NULL; } diff --git a/ompi/mca/btl/openib/btl_openib_proc.c b/ompi/mca/btl/openib/btl_openib_proc.c index 1beb83e6dc..6ecb636ae4 100644 --- a/ompi/mca/btl/openib/btl_openib_proc.c +++ b/ompi/mca/btl/openib/btl_openib_proc.c @@ -137,15 +137,15 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_create(ompi_proc_t* ompi_proc) if(OMPI_SUCCESS != rc) { - opal_output(mca_btl_base_output, "[%s:%d] ompi_modex_recv failed for peer [%ld,%ld,%ld]", - __FILE__,__LINE__,ORTE_NAME_ARGS(&ompi_proc->proc_name)); + opal_output(mca_btl_base_output, "[%s:%d] ompi_modex_recv failed for peer %s", + __FILE__,__LINE__,ORTE_NAME_PRINT(&ompi_proc->proc_name)); OBJ_RELEASE(module_proc); return NULL; } if((size % sizeof(mca_btl_openib_port_info_t)) != 0) { - opal_output(mca_btl_base_output, "[%s:%d] invalid module address for peer [%ld,%ld,%ld]", - __FILE__,__LINE__,ORTE_NAME_ARGS(&ompi_proc->proc_name)); + opal_output(mca_btl_base_output, "[%s:%d] invalid module address for peer %s", + __FILE__,__LINE__,ORTE_NAME_PRINT(&ompi_proc->proc_name)); OBJ_RELEASE(module_proc); return NULL; } diff --git a/ompi/mca/btl/tcp/btl_tcp_endpoint.c b/ompi/mca/btl/tcp/btl_tcp_endpoint.c index 511c5ca688..32dc410e03 100644 --- a/ompi/mca/btl/tcp/btl_tcp_endpoint.c +++ b/ompi/mca/btl/tcp/btl_tcp_endpoint.c @@ -470,8 +470,8 @@ static int mca_btl_tcp_endpoint_recv_connect_ack(mca_btl_base_endpoint_t* btl_en ORTE_PROCESS_NAME_NTOH(guid); /* compare this to the expected values */ if (0 != orte_ns.compare_fields(ORTE_NS_CMP_ALL, &btl_proc->proc_name, &guid)) { - BTL_ERROR(("received unexpected process identifier [%lu,%lu,%lu]", - ORTE_NAME_ARGS(&guid))); + BTL_ERROR(("received unexpected process identifier %s", + ORTE_NAME_PRINT(&guid))); mca_btl_tcp_endpoint_close(btl_endpoint); return OMPI_ERR_UNREACH; } diff --git a/ompi/mca/btl/udapl/btl_udapl_proc.c b/ompi/mca/btl/udapl/btl_udapl_proc.c index 3718e1d5d5..6f60d0bdaf 100644 --- a/ompi/mca/btl/udapl/btl_udapl_proc.c +++ b/ompi/mca/btl/udapl/btl_udapl_proc.c @@ -129,15 +129,15 @@ mca_btl_udapl_proc_t* mca_btl_udapl_proc_create(ompi_proc_t* ompi_proc) (void*)&udapl_proc->proc_addrs, &size); if(OMPI_SUCCESS != rc) { - opal_output(0, "[%s:%d] ompi_modex_recv failed for peer [%ld,%ld,%ld]", - __FILE__,__LINE__,ORTE_NAME_ARGS(&ompi_proc->proc_name)); + opal_output(0, "[%s:%d] ompi_modex_recv failed for peer %s", + __FILE__,__LINE__,ORTE_NAME_PRINT(&ompi_proc->proc_name)); OBJ_RELEASE(udapl_proc); return NULL; } if((size % sizeof(mca_btl_udapl_addr_t)) != 0) { - opal_output(0, "[%s:%d] invalid udapl address for peer [%lu,%lu,%lu]", - __FILE__,__LINE__,ORTE_NAME_ARGS(&ompi_proc->proc_name)); + opal_output(0, "[%s:%d] invalid udapl address for peer %s", + __FILE__,__LINE__,ORTE_NAME_PRINT(&ompi_proc->proc_name)); OBJ_RELEASE(udapl_proc); return NULL; } diff --git a/ompi/mca/crcp/coord/crcp_coord_pml.c b/ompi/mca/crcp/coord/crcp_coord_pml.c index b9a237b4a2..72049407d2 100644 --- a/ompi/mca/crcp/coord/crcp_coord_pml.c +++ b/ompi/mca/crcp/coord/crcp_coord_pml.c @@ -435,7 +435,6 @@ void ompi_crcp_coord_pml_message_ref_construct(ompi_crcp_coord_pml_message_ref_t msg_ref->comm = NULL; msg_ref->request = NULL; - msg_ref->proc_name.cellid = ORTE_CELLID_INVALID; msg_ref->proc_name.jobid = ORTE_JOBID_INVALID; msg_ref->proc_name.vpid = ORTE_VPID_INVALID; @@ -471,7 +470,6 @@ void ompi_crcp_coord_pml_message_ref_destruct( ompi_crcp_coord_pml_message_ref_t msg_ref->request = NULL; } - msg_ref->proc_name.cellid = ORTE_CELLID_INVALID; msg_ref->proc_name.jobid = ORTE_JOBID_INVALID; msg_ref->proc_name.vpid = ORTE_VPID_INVALID; @@ -489,7 +487,6 @@ OBJ_CLASS_INSTANCE(ompi_crcp_coord_pml_peer_ref_t, ompi_crcp_coord_pml_peer_ref_destruct); void ompi_crcp_coord_pml_peer_ref_construct(ompi_crcp_coord_pml_peer_ref_t *peer_ref) { - peer_ref->proc_name.cellid = ORTE_CELLID_INVALID; peer_ref->proc_name.jobid = ORTE_JOBID_INVALID; peer_ref->proc_name.vpid = ORTE_VPID_INVALID; @@ -521,7 +518,6 @@ void ompi_crcp_coord_pml_peer_ref_construct(ompi_crcp_coord_pml_peer_ref_t *peer void ompi_crcp_coord_pml_peer_ref_destruct( ompi_crcp_coord_pml_peer_ref_t *peer_ref) { opal_list_item_t* item = NULL; - peer_ref->proc_name.cellid = ORTE_CELLID_INVALID; peer_ref->proc_name.jobid = ORTE_JOBID_INVALID; peer_ref->proc_name.vpid = ORTE_VPID_INVALID; @@ -593,7 +589,6 @@ OBJ_CLASS_INSTANCE(drain_msg_ack_ref_t, void drain_msg_ack_ref_construct(drain_msg_ack_ref_t *msg_ack_ref) { msg_ack_ref->complete = false; - msg_ack_ref->peer.cellid = ORTE_CELLID_INVALID; msg_ack_ref->peer.jobid = ORTE_JOBID_INVALID; msg_ack_ref->peer.vpid = ORTE_VPID_INVALID; } @@ -601,7 +596,6 @@ void drain_msg_ack_ref_construct(drain_msg_ack_ref_t *msg_ack_ref) { void drain_msg_ack_ref_destruct( drain_msg_ack_ref_t *msg_ack_ref) { msg_ack_ref->complete = false; - msg_ack_ref->peer.cellid = ORTE_CELLID_INVALID; msg_ack_ref->peer.jobid = ORTE_JOBID_INVALID; msg_ack_ref->peer.vpid = ORTE_VPID_INVALID; } @@ -650,7 +644,7 @@ OBJ_CLASS_INSTANCE(ompi_crcp_coord_pml_state_t, v_msg_ref = v_coord_state->msg_ref; \ } -#define CREATE_NEW_MSG(msg_ref, v_type, v_buffer, v_count, v_datatype, v_tag, v_rank, v_comm, v_request, p_cellid, p_jobid, p_vpid) \ +#define CREATE_NEW_MSG(msg_ref, v_type, v_buffer, v_count, v_datatype, v_tag, v_rank, v_comm, v_request, p_jobid, p_vpid) \ { \ msg_ref = OBJ_NEW(ompi_crcp_coord_pml_message_ref_t); \ msg_ref->msg_id = message_seq_num; \ @@ -677,7 +671,6 @@ OBJ_CLASS_INSTANCE(ompi_crcp_coord_pml_state_t, OBJ_RETAIN(msg_ref->request); \ } \ \ - msg_ref->proc_name.cellid = p_cellid; \ msg_ref->proc_name.jobid = p_jobid; \ msg_ref->proc_name.vpid = p_vpid; \ } @@ -708,7 +701,6 @@ OBJ_CLASS_INSTANCE(ompi_crcp_coord_pml_state_t, OBJ_RETAIN(msg_ref->request); \ } \ \ - dup_msg_ref->proc_name.cellid = msg_ref->proc_name.cellid; \ dup_msg_ref->proc_name.jobid = msg_ref->proc_name.jobid; \ dup_msg_ref->proc_name.vpid = msg_ref->proc_name.vpid; \ } @@ -884,7 +876,6 @@ ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_add_procs( for( i = 0; i < nprocs; ++i) { new_peer_ref = OBJ_NEW(ompi_crcp_coord_pml_peer_ref_t); - new_peer_ref->proc_name.cellid = procs[i]->proc_name.cellid; new_peer_ref->proc_name.jobid = procs[i]->proc_name.jobid; new_peer_ref->proc_name.vpid = procs[i]->proc_name.vpid; @@ -917,8 +908,8 @@ ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_del_procs( item = (opal_list_item_t*)find_peer(procs[i]->proc_name); if(NULL == item) { opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: del_procs: Unable to find peer [%lu,%lu,%lu]\n", - ORTE_NAME_ARGS(&(procs[i]->proc_name))); + "crcp:coord: del_procs: Unable to find peer %s\n", + ORTE_NAME_PRINT(&(procs[i]->proc_name))); exit_status = OMPI_ERROR; goto DONE; } @@ -981,7 +972,6 @@ ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_isend_init( buf, count, datatype, tag, dst, comm, NULL, - peer_ref->proc_name.cellid, peer_ref->proc_name.jobid, peer_ref->proc_name.vpid); @@ -1075,7 +1065,6 @@ ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_isend( CREATE_NEW_MSG(msg_ref, COORD_MSG_TYPE_I_SEND, buf, count, datatype, tag, dst, comm, NULL, - peer_ref->proc_name.cellid, peer_ref->proc_name.jobid, peer_ref->proc_name.vpid); @@ -1171,7 +1160,6 @@ ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_send( CREATE_NEW_MSG(msg_ref, COORD_MSG_TYPE_B_SEND, buf, count, datatype, tag, dst, comm, NULL, - peer_ref->proc_name.cellid, peer_ref->proc_name.jobid, peer_ref->proc_name.vpid); @@ -1264,7 +1252,6 @@ ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_irecv_init( buf, count, datatype, tag, src, comm, NULL, /* Leave this NULL for now, will pick up real value in POST */ - ORTE_CELLID_INVALID, ORTE_JOBID_INVALID, ORTE_VPID_INVALID); @@ -1296,7 +1283,6 @@ ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_irecv_init( goto DONE; } - msg_ref->proc_name.cellid = peer_ref->proc_name.cellid; msg_ref->proc_name.jobid = peer_ref->proc_name.jobid; msg_ref->proc_name.vpid = peer_ref->proc_name.vpid; @@ -1452,7 +1438,6 @@ ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_irecv( buf, count, datatype, tag, src, comm, NULL, /* Leave this NULL for now, will pick up real value in POST */ - ORTE_CELLID_INVALID, ORTE_JOBID_INVALID, ORTE_VPID_INVALID); @@ -1484,7 +1469,6 @@ ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_irecv( goto DONE; } - msg_ref->proc_name.cellid = peer_ref->proc_name.cellid; msg_ref->proc_name.jobid = peer_ref->proc_name.jobid; msg_ref->proc_name.vpid = peer_ref->proc_name.vpid; @@ -1656,7 +1640,6 @@ ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_recv( buf, count, datatype, tag, src, comm, request, - ORTE_CELLID_INVALID, ORTE_JOBID_INVALID, ORTE_VPID_INVALID); @@ -1688,7 +1671,6 @@ ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_recv( goto DONE; } - msg_ref->proc_name.cellid = peer_ref->proc_name.cellid; msg_ref->proc_name.jobid = peer_ref->proc_name.jobid; msg_ref->proc_name.vpid = peer_ref->proc_name.vpid; @@ -1722,7 +1704,6 @@ ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_recv( goto DONE; } - msg_ref->proc_name.cellid = peer_ref->proc_name.cellid; msg_ref->proc_name.jobid = peer_ref->proc_name.jobid; msg_ref->proc_name.vpid = peer_ref->proc_name.vpid; @@ -1859,7 +1840,6 @@ ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_recv( CREATE_NEW_MSG(msg_ref, COORD_MSG_TYPE_B_RECV, buf, count, datatype, tag, src, comm, NULL, - ORTE_CELLID_INVALID, ORTE_JOBID_INVALID, ORTE_VPID_INVALID); @@ -1895,7 +1875,6 @@ ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_recv( goto DONE; } - msg_ref->proc_name.cellid = peer_ref->proc_name.cellid; msg_ref->proc_name.jobid = peer_ref->proc_name.jobid; msg_ref->proc_name.vpid = peer_ref->proc_name.vpid; @@ -1942,7 +1921,6 @@ ompi_crcp_base_pml_state_t* ompi_crcp_coord_pml_recv( goto DONE; } - msg_ref->proc_name.cellid = peer_ref->proc_name.cellid; msg_ref->proc_name.jobid = peer_ref->proc_name.jobid; msg_ref->proc_name.vpid = peer_ref->proc_name.vpid; @@ -3020,8 +2998,8 @@ static int ft_event_coordinate_peers(void) */ if( stall_for_completion ) { opal_output_verbose(15, mca_crcp_coord_component.super.output_handle, - "crcp:coord: ft_event_coordinate_peers: [%lu,%lu,%lu] **** STALLING ***", - ORTE_NAME_ARGS(orte_process_info.my_name)); + "crcp:coord: ft_event_coordinate_peers: %s **** STALLING ***", + ORTE_NAME_PRINT(orte_process_info.my_name)); step_to_return_to = 1; exit_status = OMPI_SUCCESS; goto DONE; @@ -3058,8 +3036,8 @@ static int ft_event_coordinate_peers(void) } opal_output_verbose(5, mca_crcp_coord_component.super.output_handle, - "crcp:coord: ft_event_coordinate_peers: [%lu,%lu,%lu] Coordination Finished...\n", - ORTE_NAME_ARGS(orte_process_info.my_name) ); + "crcp:coord: ft_event_coordinate_peers: %s Coordination Finished...\n", + ORTE_NAME_PRINT(orte_process_info.my_name) ); /* * Now that all our peer channels are marked as drained @@ -3176,11 +3154,11 @@ static int ft_event_check_bookmarks(void) if( 10 <= mca_crcp_coord_component.super.verbose ) { sleep(orte_process_info.my_name->vpid); opal_output_verbose(10, mca_crcp_coord_component.super.output_handle, - "Process [%lu,%lu,%lu] Match Table", - ORTE_NAME_ARGS(orte_process_info.my_name)); + "Process %s Match Table", + ORTE_NAME_PRINT(orte_process_info.my_name)); opal_output_verbose(10, mca_crcp_coord_component.super.output_handle, - "[%lu,%lu,%lu] %5s | %7s | %7s | %7s | %7s |", - ORTE_NAME_ARGS(orte_process_info.my_name), + "%s %5s | %7s | %7s | %7s | %7s |", + ORTE_NAME_PRINT(orte_process_info.my_name), "Vpid", "T_Send", "M_Recv", "M_Send", "T_Recv"); for(item = opal_list_get_first(&ompi_crcp_coord_pml_peer_refs); @@ -3205,8 +3183,8 @@ static int ft_event_check_bookmarks(void) peer_ref->matched_recv_init_msgs ); opal_output_verbose(10, mca_crcp_coord_component.super.output_handle, - "[%lu,%lu,%lu] %5d | %7d | %7d | %7d | %7d |", - ORTE_NAME_ARGS(orte_process_info.my_name), + "%s %5d | %7d | %7d | %7d | %7d |", + ORTE_NAME_PRINT(orte_process_info.my_name), peer_ref->proc_name.vpid, t_send, m_recv, m_send, t_recv); } @@ -3245,11 +3223,11 @@ static int ft_event_check_bookmarks(void) /* T_Send >= M_Recv */ if( p_n_to_p_m < p_n_from_p_m ) { opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: check_bookmarks: [%lu,%lu,%lu] --> [%lu,%lu,%lu] " + "crcp:coord: check_bookmarks: %s --> %s " "Sent Msgs (%4d) = Received Msgs (%4d) => Diff (%4d). " " WARNING: Peer received more than was sent. :(\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer_ref->proc_name)), + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer_ref->proc_name)), p_n_to_p_m, p_n_from_p_m, (p_n_to_p_m - p_n_from_p_m) @@ -3260,10 +3238,10 @@ static int ft_event_check_bookmarks(void) * so need to coordinate with peer. */ if( p_n_to_p_m > p_n_from_p_m) { opal_output_verbose(10, mca_crcp_coord_component.super.output_handle, - "crcp:coord: check_bookmarks: [%lu,%lu,%lu] --> [%lu,%lu,%lu] " + "crcp:coord: check_bookmarks: %s --> %s " "Sent Msgs (%4d) = Received Msgs (%4d). Peer needs %4d.\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer_ref->proc_name)), + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer_ref->proc_name)), p_n_to_p_m, p_n_from_p_m, (p_n_to_p_m - p_n_from_p_m) @@ -3275,8 +3253,8 @@ static int ft_event_check_bookmarks(void) */ if( OMPI_SUCCESS != (ret = send_msg_details(peer_ref, p_n_to_p_m, p_n_from_p_m) ) ) { opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: check_bookmarks: Unable to send message details to peer [%lu,%lu,%lu]: Return %d\n", - ORTE_NAME_ARGS(&peer_ref->proc_name), + "crcp:coord: check_bookmarks: Unable to send message details to peer %s: Return %d\n", + ORTE_NAME_PRINT(&peer_ref->proc_name), ret); return ret; } @@ -3296,11 +3274,11 @@ static int ft_event_check_bookmarks(void) /* M_Send >= T_Recv */ if( p_n_to_p_m < p_n_from_p_m ) { opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: check_bookmarks: [%lu,%lu,%lu] --> [%lu,%lu,%lu] " + "crcp:coord: check_bookmarks: %s --> %s " "Sent Msgs (%4d) = Received Msgs (%4d) => Diff (%4d). " " WARNING: I received more than the peer sent. :(\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer_ref->proc_name)), + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer_ref->proc_name)), p_n_to_p_m, p_n_from_p_m, (p_n_to_p_m - p_n_from_p_m) @@ -3311,10 +3289,10 @@ static int ft_event_check_bookmarks(void) * so need to coordinate with peer. */ if( p_n_to_p_m > p_n_from_p_m) { opal_output_verbose(10, mca_crcp_coord_component.super.output_handle, - "crcp:coord: check_bookmarks: [%lu,%lu,%lu] <-- [%lu,%lu,%lu] " + "crcp:coord: check_bookmarks: %s <-- %s " "Received Msgs (%4d) = Sent Msgs (%4d). I need %4d.\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer_ref->proc_name)), + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer_ref->proc_name)), p_n_to_p_m, p_n_from_p_m, (p_n_to_p_m - p_n_from_p_m) @@ -3325,8 +3303,8 @@ static int ft_event_check_bookmarks(void) */ if( OMPI_SUCCESS != (ret = recv_msg_details(peer_ref, p_n_to_p_m, p_n_from_p_m) ) ) { opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: check_bookmarks: Unable to recv message details from peer [%lu,%lu,%lu]: Return %d\n", - ORTE_NAME_ARGS(&peer_ref->proc_name), + "crcp:coord: check_bookmarks: Unable to recv message details from peer %s: Return %d\n", + ORTE_NAME_PRINT(&peer_ref->proc_name), ret); return ret; } @@ -3348,11 +3326,11 @@ static int ft_event_check_bookmarks(void) /* M_Send >= T_Recv */ if( p_n_to_p_m < p_n_from_p_m ) { opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: check_bookmarks: [%lu,%lu,%lu] --> [%lu,%lu,%lu] " + "crcp:coord: check_bookmarks: %s --> %s " "Sent Msgs (%4d) = Received Msgs (%4d) => Diff (%4d). " " WARNING: I received more than the peer sent. :(\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer_ref->proc_name)), + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer_ref->proc_name)), p_n_to_p_m, p_n_from_p_m, (p_n_to_p_m - p_n_from_p_m) @@ -3363,10 +3341,10 @@ static int ft_event_check_bookmarks(void) * so need to coordinate with peer. */ if( p_n_to_p_m > p_n_from_p_m) { opal_output_verbose(10, mca_crcp_coord_component.super.output_handle, - "crcp:coord: check_bookmarks: [%lu,%lu,%lu] <-- [%lu,%lu,%lu] " + "crcp:coord: check_bookmarks: %s <-- %s " "Received Msgs (%4d) = Sent Msgs (%4d). I need %4d.\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer_ref->proc_name)), + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer_ref->proc_name)), p_n_to_p_m, p_n_from_p_m, (p_n_to_p_m - p_n_from_p_m) @@ -3377,8 +3355,8 @@ static int ft_event_check_bookmarks(void) */ if( OMPI_SUCCESS != (ret = recv_msg_details(peer_ref, p_n_to_p_m, p_n_from_p_m) ) ) { opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: check_bookmarks: Unable to recv message details from peer [%lu,%lu,%lu]: Return %d\n", - ORTE_NAME_ARGS(&peer_ref->proc_name), + "crcp:coord: check_bookmarks: Unable to recv message details from peer %s: Return %d\n", + ORTE_NAME_PRINT(&peer_ref->proc_name), ret); return ret; } @@ -3398,11 +3376,11 @@ static int ft_event_check_bookmarks(void) /* T_Send >= M_Recv */ if( p_n_to_p_m < p_n_from_p_m ) { opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: check_bookmarks: [%lu,%lu,%lu] --> [%lu,%lu,%lu] " + "crcp:coord: check_bookmarks: %s --> %s " "Sent Msgs (%4d) = Received Msgs (%4d) => Diff (%4d). " " WARNING: Peer received more than was sent. :(\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer_ref->proc_name)), + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer_ref->proc_name)), p_n_to_p_m, p_n_from_p_m, (p_n_to_p_m - p_n_from_p_m) @@ -3413,10 +3391,10 @@ static int ft_event_check_bookmarks(void) * so need to coordinate with peer. */ if( p_n_to_p_m > p_n_from_p_m) { opal_output_verbose(10, mca_crcp_coord_component.super.output_handle, - "crcp:coord: check_bookmarks: [%lu,%lu,%lu] --> [%lu,%lu,%lu] " + "crcp:coord: check_bookmarks: %s --> %s " "Sent Msgs (%4d) = Received Msgs (%4d). Peer needs %4d.\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer_ref->proc_name)), + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer_ref->proc_name)), p_n_to_p_m, p_n_from_p_m, (p_n_to_p_m - p_n_from_p_m) @@ -3428,8 +3406,8 @@ static int ft_event_check_bookmarks(void) */ if( OMPI_SUCCESS != (ret = send_msg_details(peer_ref, p_n_to_p_m, p_n_from_p_m) ) ) { opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: check_bookmarks: Unable to send message details to peer [%lu,%lu,%lu]: Return %d\n", - ORTE_NAME_ARGS(&peer_ref->proc_name), + "crcp:coord: check_bookmarks: Unable to send message details to peer %s: Return %d\n", + ORTE_NAME_PRINT(&peer_ref->proc_name), ret); return ret; } @@ -3462,8 +3440,8 @@ static int ft_event_post_drain_acks(void) } opal_output_verbose(10, mca_crcp_coord_component.super.output_handle, - "crcp:coord: post_drain_ack: [%lu,%lu,%lu] Wait on %d Drain ACK Messages.\n", - ORTE_NAME_ARGS(orte_process_info.my_name), + "crcp:coord: post_drain_ack: %s Wait on %d Drain ACK Messages.\n", + ORTE_NAME_PRINT(orte_process_info.my_name), (int)req_size); /* @@ -3482,8 +3460,8 @@ static int ft_event_post_drain_acks(void) drain_message_ack_cbfunc, NULL) ) ) { opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: post_drain_acks: [%lu,%lu,%lu] Failed to post a RML receive to the peer\n", - ORTE_NAME_ARGS(orte_process_info.my_name)); + "crcp:coord: post_drain_acks: %s Failed to post a RML receive to the peer\n", + ORTE_NAME_PRINT(orte_process_info.my_name)); return ret; } } @@ -3518,24 +3496,23 @@ static void drain_message_ack_cbfunc(int status, /* If this ACK has not completed yet */ if(!drain_msg_ack->complete) { /* If it is the correct peer */ - if(drain_msg_ack->peer.cellid == sender->cellid && - drain_msg_ack->peer.jobid == sender->jobid && + if(drain_msg_ack->peer.jobid == sender->jobid && drain_msg_ack->peer.vpid == sender->vpid ) { /* We found it! */ drain_msg_ack->complete = true; opal_output_verbose(5, mca_crcp_coord_component.super.output_handle, - "crcp:coord: drain_message_ack_cbfunc: [%lu,%lu,%lu] --> [%lu,%lu,%lu] Received ACK of FLUSH from peer\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(sender) ); + "crcp:coord: drain_message_ack_cbfunc: %s --> %s Received ACK of FLUSH from peer\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(sender) ); return; } } } opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: drain_message_ack_cbfunc: [%lu,%lu,%lu] --> [%lu,%lu,%lu] ERROR: Uable to match ACK to peer\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(sender) ); + "crcp:coord: drain_message_ack_cbfunc: %s --> %s ERROR: Uable to match ACK to peer\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(sender) ); cleanup: return; @@ -3553,8 +3530,8 @@ static int ft_event_post_drained(void) } opal_output_verbose(10, mca_crcp_coord_component.super.output_handle, - "crcp:coord: post_drained: [%lu,%lu,%lu] Draining %d Messages.\n", - ORTE_NAME_ARGS(orte_process_info.my_name), + "crcp:coord: post_drained: %s Draining %d Messages.\n", + ORTE_NAME_PRINT(orte_process_info.my_name), (int)req_size); /* @@ -3574,8 +3551,8 @@ static int ft_event_post_drained(void) */ if( drain_msg->already_posted ) { opal_output_verbose(15, mca_crcp_coord_component.super.output_handle, - "crcp:coord: post_drained: [%lu,%lu,%lu] Found a message that we don't need to post.\n", - ORTE_NAME_ARGS(orte_process_info.my_name)); + "crcp:coord: post_drained: %s Found a message that we don't need to post.\n", + ORTE_NAME_PRINT(orte_process_info.my_name)); continue; } /* @@ -3583,8 +3560,8 @@ static int ft_event_post_drained(void) */ else { opal_output_verbose(15, mca_crcp_coord_component.super.output_handle, - "crcp:coord: post_drained: [%lu,%lu,%lu] Posting a message to be drained from %d.\n", - ORTE_NAME_ARGS(orte_process_info.my_name), + "crcp:coord: post_drained: %s Posting a message to be drained from %d.\n", + ORTE_NAME_PRINT(orte_process_info.my_name), drain_msg->rank); if( OMPI_SUCCESS != (ret = wrapped_pml_module->pml_irecv(drain_msg->buffer, (drain_msg->count * drain_msg->ddt_size), @@ -3594,8 +3571,8 @@ static int ft_event_post_drained(void) drain_msg->comm, &(drain_msg->request) ) ) ) { opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: post_drained: [%lu,%lu,%lu] Failed to post the Draining PML iRecv\n", - ORTE_NAME_ARGS(orte_process_info.my_name) ); + "crcp:coord: post_drained: %s Failed to post the Draining PML iRecv\n", + ORTE_NAME_PRINT(orte_process_info.my_name) ); return ret; } } @@ -3614,8 +3591,8 @@ static int ft_event_wait_quiesce(void) **********************************************/ if( OMPI_SUCCESS != (ret = wait_quiesce_drained() ) ) { opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: wait_quiesce: [%lu,%lu,%lu] Failed to quiesce drained messages\n", - ORTE_NAME_ARGS(orte_process_info.my_name) ); + "crcp:coord: wait_quiesce: %s Failed to quiesce drained messages\n", + ORTE_NAME_PRINT(orte_process_info.my_name) ); exit_status = ret; goto cleanup; } @@ -3625,8 +3602,8 @@ static int ft_event_wait_quiesce(void) *******************************************************************/ if( OMPI_SUCCESS != (ret = wait_quiesce_drain_ack() ) ) { opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: wait_quiesce: [%lu,%lu,%lu] Failed to recv all drain ACKs\n", - ORTE_NAME_ARGS(orte_process_info.my_name) ); + "crcp:coord: wait_quiesce: %s Failed to recv all drain ACKs\n", + ORTE_NAME_PRINT(orte_process_info.my_name) ); exit_status = ret; goto cleanup; } @@ -3658,8 +3635,8 @@ static int wait_quiesce_drained(void) } opal_output_verbose(5, mca_crcp_coord_component.super.output_handle, - "crcp:coord: wait_quiesce_drained: [%lu,%lu,%lu] Waiting on %d messages to drain\n", - ORTE_NAME_ARGS(orte_process_info.my_name), + "crcp:coord: wait_quiesce_drained: %s Waiting on %d messages to drain\n", + ORTE_NAME_PRINT(orte_process_info.my_name), (int)req_size); /* @@ -3690,7 +3667,6 @@ static int wait_quiesce_drained(void) wait_any_requests[i] = &ompi_request_null; wait_any_status[i] = &ompi_status_empty; - proc_names[i].cellid = ORTE_CELLID_INVALID; proc_names[i].jobid = ORTE_JOBID_INVALID; proc_names[i].vpid = ORTE_VPID_INVALID; } @@ -3714,15 +3690,15 @@ static int wait_quiesce_drained(void) */ if( drain_msg->already_posted && NULL == drain_msg->request) { opal_output_verbose(10, mca_crcp_coord_component.super.output_handle, - "crcp:coord: wait_quiesce_drained: [%lu,%lu,%lu] - [%lu,%lu,%lu] Already posted this msg.\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(drain_msg->proc_name)) ); + "crcp:coord: wait_quiesce_drained: %s - %s Already posted this msg.\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(drain_msg->proc_name)) ); } else { opal_output_verbose(15, mca_crcp_coord_component.super.output_handle, - "crcp:coord: wait_quiesce_drained: [%lu,%lu,%lu] - [%lu,%lu,%lu] Waiting on message. (index = %d)\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(drain_msg->proc_name)), + "crcp:coord: wait_quiesce_drained: %s - %s Waiting on message. (index = %d)\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(drain_msg->proc_name)), (int)wait_any_count); wait_any_requests[wait_any_count] = drain_msg->request; @@ -3736,8 +3712,7 @@ static int wait_quiesce_drained(void) /* Add proc to response queue if it is not already there */ found = false; for(i = 0; i < last_proc_idx; ++i) { - if(proc_names[i].cellid == drain_msg->proc_name.cellid && - proc_names[i].jobid == drain_msg->proc_name.jobid && + if(proc_names[i].jobid == drain_msg->proc_name.jobid && proc_names[i].vpid == drain_msg->proc_name.vpid ) { found = true; break; @@ -3745,12 +3720,11 @@ static int wait_quiesce_drained(void) } if( !found ) { opal_output_verbose(15, mca_crcp_coord_component.super.output_handle, - "crcp:coord: wait_quiesce: [%lu,%lu,%lu] - [%lu,%lu,%lu] Add process to response list [idx %d]\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(drain_msg->proc_name)), + "crcp:coord: wait_quiesce: %s - %s Add process to response list [idx %d]\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(drain_msg->proc_name)), (int)last_proc_idx); - proc_names[last_proc_idx].cellid = drain_msg->proc_name.cellid; proc_names[last_proc_idx].jobid = drain_msg->proc_name.jobid; proc_names[last_proc_idx].vpid = drain_msg->proc_name.vpid; last_proc_idx++; @@ -3774,8 +3748,8 @@ static int wait_quiesce_drained(void) * Send ACKs to all peers */ opal_output_verbose(5, mca_crcp_coord_component.super.output_handle, - "crcp:coord: wait_quiesce: [%lu,%lu,%lu] Send ACKs to all Peers\n", - ORTE_NAME_ARGS(orte_process_info.my_name)); + "crcp:coord: wait_quiesce: %s Send ACKs to all Peers\n", + ORTE_NAME_PRINT(orte_process_info.my_name)); for(i = 0; i < last_proc_idx; ++i) { orte_buffer_t *buffer = NULL; @@ -3854,8 +3828,8 @@ static int coord_request_wait_all( size_t count, coord_request_wait(req, status); opal_output_verbose(15, mca_crcp_coord_component.super.output_handle, - "crcp:coord: request_wait_all: [%lu,%lu,%lu] Done with idx %d of %d\n", - ORTE_NAME_ARGS(orte_process_info.my_name), + "crcp:coord: request_wait_all: %s Done with idx %d of %d\n", + ORTE_NAME_PRINT(orte_process_info.my_name), (int)i, (int)count); } @@ -3897,8 +3871,8 @@ static int wait_quiesce_drain_ack(void) } opal_output_verbose(10, mca_crcp_coord_component.super.output_handle, - "crcp:coord: wait_quiesce_drain_ack: [%lu,%lu,%lu] Waiting on %d Drain ACK messages\n", - ORTE_NAME_ARGS(orte_process_info.my_name), + "crcp:coord: wait_quiesce_drain_ack: %s Waiting on %d Drain ACK messages\n", + ORTE_NAME_PRINT(orte_process_info.my_name), num_outstanding); while(0 < num_outstanding) { @@ -3939,7 +3913,6 @@ static int send_bookmarks(int peer_idx) /* * Find the peer structure for this peer */ - peer_name.cellid = orte_process_info.my_name->cellid; peer_name.jobid = orte_process_info.my_name->jobid; peer_name.vpid = peer_idx; @@ -3952,9 +3925,9 @@ static int send_bookmarks(int peer_idx) } opal_output_verbose(15, mca_crcp_coord_component.super.output_handle, - "crcp:coord: send_bookmarks: [%lu,%lu,%lu] -> [%lu,%lu,%lu] Sending bookmark S[%4d,%4d,%4d] R[%4d,%4d,%4d]\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&peer_name), + "crcp:coord: send_bookmarks: %s -> %s Sending bookmark S[%4d,%4d,%4d] R[%4d,%4d,%4d]\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&peer_name), peer_ref->total_send_msgs, peer_ref->total_isend_msgs, peer_ref->total_send_init_msgs, @@ -3986,8 +3959,8 @@ static int send_bookmarks(int peer_idx) if ( 0 > ( ret = orte_rml.send_buffer(&peer_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) { opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: send_bookmarks: Failed to send bookmark to peer [%lu,%lu,%lu]: Return %d\n", - ORTE_NAME_ARGS(&peer_name), + "crcp:coord: send_bookmarks: Failed to send bookmark to peer %s: Return %d\n", + ORTE_NAME_PRINT(&peer_name), ret); exit_status = ret; goto cleanup; @@ -4013,7 +3986,6 @@ static int recv_bookmarks(int peer_idx) /* * Find the peer structure for this peer */ - peer_name.cellid = orte_process_info.my_name->cellid; peer_name.jobid = orte_process_info.my_name->jobid; peer_name.vpid = peer_idx; @@ -4035,8 +4007,8 @@ static int recv_bookmarks(int peer_idx) if ( 0 > (ret = orte_rml.recv_buffer(&peer_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0) ) , 0) { opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: recv_bookmarks: Failed to receive bookmark from peer [%lu,%lu,%lu]: Return %d\n", - ORTE_NAME_ARGS(&peer_name), + "crcp:coord: recv_bookmarks: Failed to receive bookmark from peer %s: Return %d\n", + ORTE_NAME_PRINT(&peer_name), ret); exit_status = ret; goto cleanup; @@ -4063,9 +4035,9 @@ static int recv_bookmarks(int peer_idx) peer_ref->matched_recv_init_msgs = tmp_int; opal_output_verbose(15, mca_crcp_coord_component.super.output_handle, - "crcp:coord: recv_bookmarks: [%lu,%lu,%lu] <- [%lu,%lu,%lu] Received bookmark S[%4d,%4d,%4d] R[%4d,%4d,%4d]\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&peer_name), + "crcp:coord: recv_bookmarks: %s <- %s Received bookmark S[%4d,%4d,%4d] R[%4d,%4d,%4d]\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&peer_name), peer_ref->matched_send_msgs, peer_ref->matched_isend_msgs, peer_ref->matched_send_init_msgs, @@ -4119,9 +4091,9 @@ static int send_msg_details(ompi_crcp_coord_pml_peer_ref_t *peer_ref, found_match = false; if(OMPI_SUCCESS != (ret = do_send_msg_detail(peer_ref, msg_ref, &found_match, &finished)) ) { opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: send_msg_details: [%lu,%lu,%lu] --> [%lu,%lu,%lu] Failed to send message details to peer. Return %d\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer_ref->proc_name)), + "crcp:coord: send_msg_details: %s --> %s Failed to send message details to peer. Return %d\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer_ref->proc_name)), ret); } if(found_match) { @@ -4165,15 +4137,14 @@ static int send_msg_details(ompi_crcp_coord_pml_peer_ref_t *peer_ref, * inflight messages into a local buffer */ d_msg_ack = OBJ_NEW(drain_msg_ack_ref_t); - d_msg_ack->peer.cellid = peer_ref->proc_name.cellid; d_msg_ack->peer.jobid = peer_ref->proc_name.jobid; d_msg_ack->peer.vpid = peer_ref->proc_name.vpid; d_msg_ack->complete = false; opal_list_append(&drained_msg_ack_list, &(d_msg_ack->super)); opal_output_verbose(10, mca_crcp_coord_component.super.output_handle, - "crcp:coord: send_msg_details: [%lu,%lu,%lu] <--> [%lu,%lu,%lu] Will wait on ACK from this peer.\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer_ref->proc_name))); + "crcp:coord: send_msg_details: %s <--> %s Will wait on ACK from this peer.\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer_ref->proc_name))); /* * If we know that we are in the middle of a blocking send/recv then we @@ -4243,8 +4214,8 @@ static int do_send_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref, if ( 0 > ( ret = orte_rml.send_buffer(&peer_ref->proc_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) { opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: do_send_msg_detail: Unable to send message details to peer [%lu,%lu,%lu]: Return %d\n", - ORTE_NAME_ARGS(&peer_ref->proc_name), + "crcp:coord: do_send_msg_detail: Unable to send message details to peer %s: Return %d\n", + ORTE_NAME_PRINT(&peer_ref->proc_name), ret); exit_status = OMPI_ERROR; @@ -4270,9 +4241,9 @@ static int do_send_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref, if ( 0 > (ret = orte_rml.recv_buffer(&peer_ref->proc_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0) ) ) { opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: do_send_msg_detail: [%lu,%lu,%lu] --> [%lu,%lu,%lu] Failed to receive ACK buffer from peer. Return %d\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer_ref->proc_name)), + "crcp:coord: do_send_msg_detail: %s --> %s Failed to receive ACK buffer from peer. Return %d\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer_ref->proc_name)), ret); exit_status = ret; goto cleanup; @@ -4341,10 +4312,10 @@ static int recv_msg_details(ompi_crcp_coord_pml_peer_ref_t *peer_ref, &p_tag, &p_count, &p_datatype_size)) ) { opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: recv_msg_details: [%lu,%lu,%lu] <-- [%lu,%lu,%lu] " + "crcp:coord: recv_msg_details: %s <-- %s " "Failed to receive message detail from peer. Return %d\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer_ref->proc_name)), + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer_ref->proc_name)), ret); exit_status = ret; goto cleanup; @@ -4361,10 +4332,10 @@ static int recv_msg_details(ompi_crcp_coord_pml_peer_ref_t *peer_ref, p_datatype_size, &found_match) ) ) { opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: recv_msg_details: [%lu,%lu,%lu] <-- [%lu,%lu,%lu] " + "crcp:coord: recv_msg_details: %s <-- %s " "Failed to check message detail from peer. Return %d\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer_ref->proc_name)), + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer_ref->proc_name)), ret); exit_status = ret; goto cleanup; @@ -4389,9 +4360,9 @@ static int recv_msg_details(ompi_crcp_coord_pml_peer_ref_t *peer_ref, if(OMPI_SUCCESS != (ret = do_recv_msg_detail_resp(peer_ref, response))) { opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: recv_msg_details: [%lu,%lu,%lu] <-- [%lu,%lu,%lu] Failed to respond to peer. Return %d\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer_ref->proc_name)), + "crcp:coord: recv_msg_details: %s <-- %s Failed to respond to peer. Return %d\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer_ref->proc_name)), ret); exit_status = ret; goto cleanup; @@ -4421,9 +4392,9 @@ static int do_recv_msg_detail(ompi_crcp_coord_pml_peer_ref_t *peer_ref, */ if ( 0 > (ret = orte_rml.recv_buffer(&peer_ref->proc_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0) ) ) { opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: do_recv_msg_detail: [%lu,%lu,%lu] <-- [%lu,%lu,%lu] Failed to receive buffer from peer. Return %d\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer_ref->proc_name)), + "crcp:coord: do_recv_msg_detail: %s <-- %s Failed to receive buffer from peer. Return %d\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer_ref->proc_name)), ret); exit_status = ret; goto cleanup; @@ -4482,20 +4453,20 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref, &msg_already_posted); /* Has the recv already been posted? */ if( OMPI_SUCCESS != ret) { opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: recv_msg_detail_check: [%lu,%lu,%lu] -- [%lu,%lu,%lu] " + "crcp:coord: recv_msg_detail_check: %s -- %s " "Failed to determine if we have received this message. Return %d\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer_ref->proc_name)), + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer_ref->proc_name)), ret); exit_status = ret; goto cleanup; } opal_output_verbose(20, mca_crcp_coord_component.super.output_handle, - "crcp:coord: recv_msg_detail_check: [%lu,%lu,%lu] -- [%lu,%lu,%lu]" + "crcp:coord: recv_msg_detail_check: %s -- %s" " found %s, complete %s, posted %s, peer_rank=[%d vs %d]\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer_ref->proc_name)), + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer_ref->proc_name)), (true == msg_found ? "True " : "False"), (true == msg_complete ? "True " : "False"), (true == msg_already_posted ? "True " : "False"), @@ -4511,8 +4482,8 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref, ompi_crcp_coord_pml_message_ref_t *d_msg = NULL; opal_output_verbose(15, mca_crcp_coord_component.super.output_handle, - "crcp:coord: recv_msg_detail_check: [%lu,%lu,%lu] Found a message that needs to be drained\n", - ORTE_NAME_ARGS(orte_process_info.my_name) ); + "crcp:coord: recv_msg_detail_check: %s Found a message that needs to be drained\n", + ORTE_NAME_PRINT(orte_process_info.my_name) ); /* * Construct a message for draining @@ -4522,7 +4493,6 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref, 0, NULL, /* Setup the datatype outside of this */ tag, rank, ompi_comm_lookup(comm_id), NULL, - peer_ref->proc_name.cellid, peer_ref->proc_name.jobid, peer_ref->proc_name.vpid); /* @@ -4570,9 +4540,9 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref, ompi_crcp_coord_pml_message_ref_t *d_msg = NULL; opal_output_verbose(10, mca_crcp_coord_component.super.output_handle, - "crcp:coord: recv_msg_detail_check: [%lu,%lu,%lu] " + "crcp:coord: recv_msg_detail_check: %s " "Found a message already posted! Prepare to drain.\n", - ORTE_NAME_ARGS(orte_process_info.my_name)); + ORTE_NAME_PRINT(orte_process_info.my_name)); /* * If this is the current blocking recv, @@ -4581,9 +4551,9 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref, if( current_msg_id == posted_msg_ref->msg_id && COORD_MSG_TYPE_B_RECV == posted_msg_ref->msg_type) { opal_output_verbose(10, mca_crcp_coord_component.super.output_handle, - "crcp:coord: recv_msg_detail_check: [%lu,%lu,%lu] " + "crcp:coord: recv_msg_detail_check: %s " "Found a message already posted! Prepare to STALL.\n", - ORTE_NAME_ARGS(orte_process_info.my_name)); + ORTE_NAME_PRINT(orte_process_info.my_name)); stall_for_completion = true; } /* @@ -4592,9 +4562,9 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref, */ else { opal_output_verbose(10, mca_crcp_coord_component.super.output_handle, - "crcp:coord: recv_msg_detail_check: [%lu,%lu,%lu] " + "crcp:coord: recv_msg_detail_check: %s " "Found a message already posted! No stall required [%3d, %3d, %3d, %3d].\n", - ORTE_NAME_ARGS(orte_process_info.my_name), + ORTE_NAME_PRINT(orte_process_info.my_name), (int)current_msg_id, (int)current_msg_type, (int)posted_msg_ref->msg_id, @@ -4626,7 +4596,6 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref, count, NULL, tag, rank, ompi_comm_lookup(comm_id), posted_msg_ref->request, - peer_ref->proc_name.cellid, peer_ref->proc_name.jobid, peer_ref->proc_name.vpid); @@ -4642,8 +4611,8 @@ static int do_recv_msg_detail_check(ompi_crcp_coord_pml_peer_ref_t *peer_ref, } else { opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: recv_msg_detail_check: ***** ERROR ***** [%lu,%lu,%lu] Failed to find an action to use. This should never happen!\n", - ORTE_NAME_ARGS(orte_process_info.my_name)); + "crcp:coord: recv_msg_detail_check: ***** ERROR ***** %s Failed to find an action to use. This should never happen!\n", + ORTE_NAME_PRINT(orte_process_info.my_name)); exit_status = OMPI_ERROR; goto cleanup; } @@ -4929,8 +4898,8 @@ static int do_recv_msg_detail_resp(ompi_crcp_coord_pml_peer_ref_t *peer_ref, if ( 0 > ( ret = orte_rml.send_buffer(&peer_ref->proc_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG, 0)) ) { opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: recv_msg_detail_resp: Unable to send message detail response to peer [%lu,%lu,%lu]: Return %d\n", - ORTE_NAME_ARGS(&peer_ref->proc_name), + "crcp:coord: recv_msg_detail_resp: Unable to send message detail response to peer %s: Return %d\n", + ORTE_NAME_PRINT(&peer_ref->proc_name), ret); exit_status = OMPI_ERROR; goto cleanup; @@ -4988,7 +4957,6 @@ static int coord_basic_barrier_send(int peer_idx) /* * Find the peer structure for this peer */ - peer_name.cellid = orte_process_info.my_name->cellid; peer_name.jobid = orte_process_info.my_name->jobid; peer_name.vpid = peer_idx; @@ -5006,8 +4974,8 @@ static int coord_basic_barrier_send(int peer_idx) /* JJH -- Really Establish TAG in rml_types.h */ if ( 0 > ( ret = orte_rml.send_buffer(&peer_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG+1, 0)) ) { opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: coord_basic_barrier_send: Failed to send ACK to peer [%lu,%lu,%lu]: Return %d\n", - ORTE_NAME_ARGS(&peer_name), + "crcp:coord: coord_basic_barrier_send: Failed to send ACK to peer %s: Return %d\n", + ORTE_NAME_PRINT(&peer_name), ret); exit_status = ret; goto cleanup; @@ -5033,7 +5001,6 @@ static int coord_basic_barrier_recv(int peer_idx) /* * Find the peer structure for this peer */ - peer_name.cellid = orte_process_info.my_name->cellid; peer_name.jobid = orte_process_info.my_name->jobid; peer_name.vpid = peer_idx; @@ -5047,8 +5014,8 @@ static int coord_basic_barrier_recv(int peer_idx) if ( 0 > (ret = orte_rml.recv_buffer(&peer_name, buffer, OMPI_CRCP_COORD_BOOKMARK_TAG+1, 0) ) ) { opal_output(mca_crcp_coord_component.super.output_handle, - "crcp:coord: recv_bookmarks: Failed to receive bookmark from peer [%lu,%lu,%lu]: Return %d\n", - ORTE_NAME_ARGS(&peer_name), + "crcp:coord: recv_bookmarks: Failed to receive bookmark from peer %s: Return %d\n", + ORTE_NAME_PRINT(&peer_name), ret); exit_status = ret; goto cleanup; diff --git a/ompi/mca/mpool/rdma/mpool_rdma_module.c b/ompi/mca/mpool/rdma/mpool_rdma_module.c index 8578e6d91a..2421d7d53b 100644 --- a/ompi/mca/mpool/rdma/mpool_rdma_module.c +++ b/ompi/mca/mpool/rdma/mpool_rdma_module.c @@ -400,9 +400,9 @@ int mca_mpool_rdma_release_memory(struct mca_mpool_base_module_t *mpool, void mca_mpool_rdma_finalize(struct mca_mpool_base_module_t *mpool) { mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool; - opal_output(0, "[%lu,%lu,%lu] rdma: stats " + opal_output(0, "%s rdma: stats " "(hit/miss/found/not found/evicted): %d/%d/%d/%d/%d\n", - ORTE_NAME_ARGS(orte_process_info.my_name), + ORTE_NAME_PRINT(orte_process_info.my_name), mpool_rdma->stat_cache_hit, mpool_rdma->stat_cache_miss, mpool_rdma->stat_cache_found, mpool_rdma->stat_cache_notfound, mpool_rdma->stat_evicted); diff --git a/ompi/mca/pml/base/pml_base_select.c b/ompi/mca/pml/base/pml_base_select.c index 63dcc64335..af33cc2c3a 100644 --- a/ompi/mca/pml/base/pml_base_select.c +++ b/ompi/mca/pml/base/pml_base_select.c @@ -318,14 +318,14 @@ mca_pml_base_pml_check_selected(const char *my_pml, if ((size != strlen(my_pml) + 1) || (0 != strcmp(my_pml, remote_pml))) { if (procs[i]->proc_hostname) { - opal_output(0, "[%lu,%lu,%lu] selected pml %s, but peer [%lu,%lu,%lu] on %s selected pml %s", - ORTE_NAME_ARGS(&ompi_proc_local()->proc_name), - my_pml, ORTE_NAME_ARGS(&procs[i]->proc_name), + opal_output(0, "%s selected pml %s, but peer %s on %s selected pml %s", + ORTE_NAME_PRINT(&ompi_proc_local()->proc_name), + my_pml, ORTE_NAME_PRINT(&procs[i]->proc_name), procs[i]->proc_hostname, remote_pml); } else { - opal_output(0, "[%lu,%lu,%lu] selected pml %s, but peer [%lu,%lu,%lu] selected pml %s", - ORTE_NAME_ARGS(&ompi_proc_local()->proc_name), - my_pml, ORTE_NAME_ARGS(&procs[i]->proc_name), + opal_output(0, "%s selected pml %s, but peer %s selected pml %s", + ORTE_NAME_PRINT(&ompi_proc_local()->proc_name), + my_pml, ORTE_NAME_PRINT(&procs[i]->proc_name), remote_pml); } return OMPI_ERR_UNREACH; diff --git a/ompi/proc/proc.c b/ompi/proc/proc.c index e28e531c94..a03c559bd0 100644 --- a/ompi/proc/proc.c +++ b/ompi/proc/proc.c @@ -291,7 +291,7 @@ ompi_proc_t * ompi_proc_find ( const orte_process_name_t * name ) orte_ns_cmp_bitmask_t mask; /* return the proc-struct which matches this jobid+process id */ - mask = ORTE_NS_CMP_CELLID | ORTE_NS_CMP_JOBID | ORTE_NS_CMP_VPID; + mask = ORTE_NS_CMP_JOBID | ORTE_NS_CMP_VPID; OPAL_THREAD_LOCK(&ompi_proc_lock); for(proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list); proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list); @@ -314,7 +314,7 @@ ompi_proc_find_and_add(const orte_process_name_t * name, bool* isnew) orte_ns_cmp_bitmask_t mask; /* return the proc-struct which matches this jobid+process id */ - mask = ORTE_NS_CMP_CELLID | ORTE_NS_CMP_JOBID | ORTE_NS_CMP_VPID; + mask = ORTE_NS_CMP_JOBID | ORTE_NS_CMP_VPID; OPAL_THREAD_LOCK(&ompi_proc_lock); for(proc = (ompi_proc_t*)opal_list_get_first(&ompi_proc_list); proc != (ompi_proc_t*)opal_list_get_end(&ompi_proc_list); @@ -549,7 +549,7 @@ static void callback(orte_gpr_notify_data_t *data, void *cbdata) OPAL_THREAD_LOCK(&ompi_proc_lock); /* loop over the data returned in the subscription */ - mask = ORTE_NS_CMP_CELLID | ORTE_NS_CMP_JOBID | ORTE_NS_CMP_VPID; + mask = ORTE_NS_CMP_JOBID | ORTE_NS_CMP_VPID; value = (orte_gpr_value_t**)(data->values)->addr; for (i = 0, k=0; k < data->cnt && i < (data->values)->size; ++i) { diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index a0659b1de0..8b6f99a0c5 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -805,8 +805,8 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided) ompi_mpi_initialized = true; if (orte_debug_flag) { - opal_output(0, "[%lu,%lu,%lu] ompi_mpi_init completed", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s ompi_mpi_init completed", + ORTE_NAME_PRINT(orte_process_info.my_name)); } /* Do we need to wait for a TotalView-like debugger? */ diff --git a/orte/class/orte_proc_table.c b/orte/class/orte_proc_table.c index 05258cf935..aa566ab984 100644 --- a/orte/class/orte_proc_table.c +++ b/orte/class/orte_proc_table.c @@ -46,7 +46,7 @@ static OBJ_CLASS_INSTANCE( NULL); #define GET_KEY(proc) \ - ( (((uint32_t) proc->cellid) << 24) + (((uint32_t) proc->jobid) << 16) + ((uint32_t) proc->vpid) ) + ( (((uint32_t) proc->jobid) << 24) + ((uint32_t) proc->vpid) ) void* orte_hash_table_get_proc(opal_hash_table_t* ht, const orte_process_name_t* proc) diff --git a/orte/include/orte/orte_types.h b/orte/include/orte/orte_types.h index dc6b6a4096..2fd97a906f 100644 --- a/orte/include/orte/orte_types.h +++ b/orte/include/orte/orte_types.h @@ -92,48 +92,44 @@ typedef void* orte_iov_base_ptr_t; #define ORTE_NAME (orte_data_type_t) 22 /**< an orte_process_name_t */ #define ORTE_VPID (orte_data_type_t) 23 /**< a vpid */ #define ORTE_JOBID (orte_data_type_t) 24 /**< a jobid */ -#define ORTE_PSET (orte_data_type_t) 25 /**< a process set */ -#define ORTE_CELLID (orte_data_type_t) 26 /**< a cellid */ -#define ORTE_NODEID (orte_data_type_t) 27 /**< a node id */ +#define ORTE_NODEID (orte_data_type_t) 25 /**< a node id */ /* SMR types */ -#define ORTE_NODE_STATE (orte_data_type_t) 28 /**< node status flag */ -#define ORTE_PROC_STATE (orte_data_type_t) 29 /**< process/resource status */ -#define ORTE_PSET_STATE (orte_data_type_t) 30 /**< process set state */ -#define ORTE_JOB_STATE (orte_data_type_t) 31 /**< job status flag */ -#define ORTE_EXIT_CODE (orte_data_type_t) 32 /**< process exit code */ +#define ORTE_NODE_STATE (orte_data_type_t) 26 /**< node status flag */ +#define ORTE_PROC_STATE (orte_data_type_t) 27 /**< process/resource status */ +#define ORTE_JOB_STATE (orte_data_type_t) 28 /**< job status flag */ +#define ORTE_EXIT_CODE (orte_data_type_t) 29 /**< process exit code */ /* GPR types */ -#define ORTE_GPR_KEYVAL (orte_data_type_t) 33 /**< registry key-value pair */ -#define ORTE_GPR_NOTIFY_ACTION (orte_data_type_t) 34 /**< registry notify action */ -#define ORTE_GPR_TRIGGER_ACTION (orte_data_type_t) 35 /**< registry trigger action */ -#define ORTE_GPR_CMD (orte_data_type_t) 36 /**< registry command */ -#define ORTE_GPR_SUBSCRIPTION_ID (orte_data_type_t) 37 /**< registry notify id tag */ -#define ORTE_GPR_TRIGGER_ID (orte_data_type_t) 38 /**< registry notify id tag */ -#define ORTE_GPR_VALUE (orte_data_type_t) 39 /**< registry return value */ -#define ORTE_GPR_ADDR_MODE (orte_data_type_t) 40 /**< Addressing mode for registry cmds */ -#define ORTE_GPR_SUBSCRIPTION (orte_data_type_t) 41 /**< describes data returned by subscription */ -#define ORTE_GPR_TRIGGER (orte_data_type_t) 42 /**< describes trigger conditions */ -#define ORTE_GPR_NOTIFY_DATA (orte_data_type_t) 43 /**< data returned from a subscription */ -#define ORTE_GPR_NOTIFY_MSG (orte_data_type_t) 44 /**< notify message containing notify_data objects */ -#define ORTE_GPR_NOTIFY_MSG_TYPE (orte_data_type_t) 45 /**< notify message type (subscription or trigger) */ -#define ORTE_GPR_SEARCH (orte_data_type_t) 46 /**< search criteria */ -#define ORTE_GPR_UPDATE (orte_data_type_t) 47 /**< update data on the registry */ +#define ORTE_GPR_KEYVAL (orte_data_type_t) 30 /**< registry key-value pair */ +#define ORTE_GPR_NOTIFY_ACTION (orte_data_type_t) 31 /**< registry notify action */ +#define ORTE_GPR_TRIGGER_ACTION (orte_data_type_t) 32 /**< registry trigger action */ +#define ORTE_GPR_CMD (orte_data_type_t) 33 /**< registry command */ +#define ORTE_GPR_SUBSCRIPTION_ID (orte_data_type_t) 34 /**< registry notify id tag */ +#define ORTE_GPR_TRIGGER_ID (orte_data_type_t) 35 /**< registry notify id tag */ +#define ORTE_GPR_VALUE (orte_data_type_t) 36 /**< registry return value */ +#define ORTE_GPR_ADDR_MODE (orte_data_type_t) 37 /**< Addressing mode for registry cmds */ +#define ORTE_GPR_SUBSCRIPTION (orte_data_type_t) 38 /**< describes data returned by subscription */ +#define ORTE_GPR_TRIGGER (orte_data_type_t) 39 /**< describes trigger conditions */ +#define ORTE_GPR_NOTIFY_DATA (orte_data_type_t) 40 /**< data returned from a subscription */ +#define ORTE_GPR_NOTIFY_MSG (orte_data_type_t) 41 /**< notify message containing notify_data objects */ +#define ORTE_GPR_NOTIFY_MSG_TYPE (orte_data_type_t) 42 /**< notify message type (subscription or trigger) */ +#define ORTE_GPR_SEARCH (orte_data_type_t) 43 /**< search criteria */ +#define ORTE_GPR_UPDATE (orte_data_type_t) 44 /**< update data on the registry */ /* Resource Manager types */ -#define ORTE_APP_CONTEXT (orte_data_type_t) 48 /**< argv and enviro arrays */ -#define ORTE_APP_CONTEXT_MAP (orte_data_type_t) 49 /**< application context mapping array */ -#define ORTE_NODE_DESC (orte_data_type_t) 50 /**< describes capabilities of nodes */ -#define ORTE_CELL_DESC (orte_data_type_t) 51 /**< describe attributes of cells */ -#define ORTE_SLOT_DESC (orte_data_type_t) 52 /**< describes slot allocations/reservations */ -#define ORTE_RAS_NODE (orte_data_type_t) 53 /**< node information */ -#define ORTE_JOB_MAP (orte_data_type_t) 54 /**< map of process locations */ -#define ORTE_MAPPED_PROC (orte_data_type_t) 55 /**< process entry on map */ -#define ORTE_MAPPED_NODE (orte_data_type_t) 56 /**< node entry on map */ -#define ORTE_ATTRIBUTE (orte_data_type_t) 57 /**< attribute used to control framework behavior */ -#define ORTE_ATTR_LIST (orte_data_type_t) 58 /**< list of attributes */ +#define ORTE_APP_CONTEXT (orte_data_type_t) 45 /**< argv and enviro arrays */ +#define ORTE_APP_CONTEXT_MAP (orte_data_type_t) 46 /**< application context mapping array */ +#define ORTE_NODE_DESC (orte_data_type_t) 47 /**< describes capabilities of nodes */ +#define ORTE_SLOT_DESC (orte_data_type_t) 48 /**< describes slot allocations/reservations */ +#define ORTE_RAS_NODE (orte_data_type_t) 49 /**< node information */ +#define ORTE_JOB_MAP (orte_data_type_t) 50 /**< map of process locations */ +#define ORTE_MAPPED_PROC (orte_data_type_t) 51 /**< process entry on map */ +#define ORTE_MAPPED_NODE (orte_data_type_t) 52 /**< node entry on map */ +#define ORTE_ATTRIBUTE (orte_data_type_t) 53 /**< attribute used to control framework behavior */ +#define ORTE_ATTR_LIST (orte_data_type_t) 54 /**< list of attributes */ /* RML types */ -#define ORTE_RML_TAG (orte_data_type_t) 59 /**< tag for sending/receiving messages */ +#define ORTE_RML_TAG (orte_data_type_t) 55 /**< tag for sending/receiving messages */ /* DAEMON communication type */ -#define ORTE_DAEMON_CMD (orte_data_type_t) 60 /**< command flag for communicating with the daemon */ +#define ORTE_DAEMON_CMD (orte_data_type_t) 56 /**< command flag for communicating with the daemon */ /* Need a command separate from ORTE_DAEMON_CMD, so that we can receive on * them both at the same time */ diff --git a/orte/mca/errmgr/base/errmgr_base_fns.c b/orte/mca/errmgr/base/errmgr_base_fns.c index 2ead52ed10..3c73ccd88f 100644 --- a/orte/mca/errmgr/base/errmgr_base_fns.c +++ b/orte/mca/errmgr/base/errmgr_base_fns.c @@ -42,14 +42,9 @@ void orte_errmgr_base_log(int error_code, char *filename, int line) return; } - if (NULL == orte_process_info.my_name) { - opal_output(0, "[NO-NAME] ORTE_ERROR_LOG: %s in file %s at line %d", - ORTE_ERROR_NAME(error_code), filename, line); - } else { - opal_output(0, "[%lu,%lu,%lu] ORTE_ERROR_LOG: %s in file %s at line %d", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_ERROR_NAME(error_code), filename, line); - } + opal_output(0, "%s ORTE_ERROR_LOG: %s in file %s at line %d", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_ERROR_NAME(error_code), filename, line); } int orte_errmgr_base_proc_aborted_not_avail(orte_gpr_notify_message_t *msg) diff --git a/orte/mca/errmgr/bproc/errmgr_bproc_component.c b/orte/mca/errmgr/bproc/errmgr_bproc_component.c index a95da889d4..712c1481be 100644 --- a/orte/mca/errmgr/bproc/errmgr_bproc_component.c +++ b/orte/mca/errmgr/bproc/errmgr_bproc_component.c @@ -154,8 +154,8 @@ orte_errmgr_bproc_component_init(bool *allow_multi_user_threads, bool *have_hidd int orte_errmgr_bproc_finalize(void) { if (orte_errmgr_bproc_globals.debug) { - opal_output(0, "[%lu,%lu,%lu] errmgr_bproc_finalize called", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s errmgr_bproc_finalize called", + ORTE_NAME_PRINT(orte_process_info.my_name)); } initialized = false; diff --git a/orte/mca/errmgr/hnp/errmgr_hnp_component.c b/orte/mca/errmgr/hnp/errmgr_hnp_component.c index 6c8a10d818..fdfcfa4ab5 100644 --- a/orte/mca/errmgr/hnp/errmgr_hnp_component.c +++ b/orte/mca/errmgr/hnp/errmgr_hnp_component.c @@ -159,8 +159,8 @@ int orte_errmgr_hnp_finalize(void) int rc; if (orte_errmgr_hnp_globals.debug) { - opal_output(0, "[%lu,%lu,%lu] errmgr_hnp_finalize called", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s errmgr_hnp_finalize called", + ORTE_NAME_PRINT(orte_process_info.my_name)); } /* stop the receive function */ diff --git a/orte/mca/errmgr/orted/errmgr_orted_component.c b/orte/mca/errmgr/orted/errmgr_orted_component.c index 703e4c2927..689fe913e2 100644 --- a/orte/mca/errmgr/orted/errmgr_orted_component.c +++ b/orte/mca/errmgr/orted/errmgr_orted_component.c @@ -154,8 +154,8 @@ orte_errmgr_orted_component_init(bool *allow_multi_user_threads, bool *have_hidd int orte_errmgr_orted_finalize(void) { if (orte_errmgr_orted_globals.debug) { - opal_output(0, "[%lu,%lu,%lu] errmgr_orted_finalize called", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s errmgr_orted_finalize called", + ORTE_NAME_PRINT(orte_process_info.my_name)); } initialized = false; diff --git a/orte/mca/errmgr/proxy/errmgr_proxy_component.c b/orte/mca/errmgr/proxy/errmgr_proxy_component.c index bdfd2567c2..efb193433d 100644 --- a/orte/mca/errmgr/proxy/errmgr_proxy_component.c +++ b/orte/mca/errmgr/proxy/errmgr_proxy_component.c @@ -153,8 +153,8 @@ orte_errmgr_proxy_component_init(bool *allow_multi_user_threads, bool *have_hidd int orte_errmgr_proxy_finalize(void) { if (orte_errmgr_proxy_globals.debug) { - opal_output(0, "[%lu,%lu,%lu] errmgr_proxy_finalize called", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s errmgr_proxy_finalize called", + ORTE_NAME_PRINT(orte_process_info.my_name)); } initialized = false; diff --git a/orte/mca/filem/base/filem_base_fns.c b/orte/mca/filem/base/filem_base_fns.c index 6fac7d6409..0099c414ca 100644 --- a/orte/mca/filem/base/filem_base_fns.c +++ b/orte/mca/filem/base/filem_base_fns.c @@ -155,8 +155,7 @@ int orte_filem_base_get_proc_node_name(orte_process_name_t *proc, char **machine * Contact GPR and get the 'orte-node-name' for this process */ /* if it is the root then we need a different key :/ */ - if(proc->cellid == 0 && - proc->jobid == 0 && + if(proc->jobid == 0 && proc->vpid == 0) { keys[0] = ORTE_PROC_RML_IP_ADDRESS_KEY; } @@ -361,9 +360,9 @@ void orte_filem_base_query_callback(int status, } opal_output_verbose(10, orte_filem_base_output, - "filem:base: filem_base_query_callback: [%lu,%lu,%lu] -> [%lu,%lu,%lu]: Filename Requested (%s) translated to (%s)", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(peer), + "filem:base: filem_base_query_callback: %s -> %s: Filename Requested (%s) translated to (%s)", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(peer), filename, tmp_name); /* diff --git a/orte/mca/filem/rsh/filem_rsh_module.c b/orte/mca/filem/rsh/filem_rsh_module.c index c23e36dacb..2cbc36032c 100644 --- a/orte/mca/filem/rsh/filem_rsh_module.c +++ b/orte/mca/filem/rsh/filem_rsh_module.c @@ -426,9 +426,9 @@ static void orte_filem_rsh_query_callback(int status, void* cbdata) { opal_output_verbose(10, mca_filem_rsh_component.super.output_handle, - "filem:rsh: query_callback([%lu,%lu,%lu] -> [%lu,%lu,%lu])", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(peer)); + "filem:rsh: query_callback(%s -> %s)", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(peer)); /* Call the base callback function */ orte_filem_base_query_callback(status, peer, buffer, tag, cbdata); diff --git a/orte/mca/gpr/proxy/gpr_proxy_component.c b/orte/mca/gpr/proxy/gpr_proxy_component.c index 5f1a700a59..090e27c97f 100644 --- a/orte/mca/gpr/proxy/gpr_proxy_component.c +++ b/orte/mca/gpr/proxy/gpr_proxy_component.c @@ -230,8 +230,8 @@ orte_gpr_proxy_component_init(bool *allow_multi_user_threads, bool *have_hidden_ if (NULL != orte_process_info.gpr_replica_uri) { if (orte_gpr_proxy_globals.debug) { - opal_output(0, "[%lu,%lu,%lu] gpr_proxy_init: proxy selected", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s gpr_proxy_init: proxy selected", + ORTE_NAME_PRINT(orte_process_info.my_name)); } /* setup the replica location */ @@ -326,8 +326,8 @@ int orte_gpr_proxy_finalize(void) orte_gpr_proxy_trigger_t **ltrigs; if (orte_gpr_proxy_globals.debug) { - opal_output(0, "[%lu,%lu,%lu] gpr_proxy_finalize called", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s gpr_proxy_finalize called", + ORTE_NAME_PRINT(orte_process_info.my_name)); } if (initialized) { diff --git a/orte/mca/gpr/proxy/gpr_proxy_compound_cmd.c b/orte/mca/gpr/proxy/gpr_proxy_compound_cmd.c index 934045c19b..f0877db684 100644 --- a/orte/mca/gpr/proxy/gpr_proxy_compound_cmd.c +++ b/orte/mca/gpr/proxy/gpr_proxy_compound_cmd.c @@ -95,7 +95,7 @@ int orte_gpr_proxy_exec_compound_cmd(orte_buffer_t *buffer) int rc, response; if (orte_gpr_proxy_globals.debug) { - opal_output(0, "[%lu,%lu,%lu] transmitting compound command", + opal_output(0, "[%ld,%ld] transmitting compound command", ORTE_NAME_ARGS(orte_process_info.my_name)); } diff --git a/orte/mca/gpr/proxy/gpr_proxy_dump_local_trigs_subs.c b/orte/mca/gpr/proxy/gpr_proxy_dump_local_trigs_subs.c index eb316b5095..3bc6412051 100644 --- a/orte/mca/gpr/proxy/gpr_proxy_dump_local_trigs_subs.c +++ b/orte/mca/gpr/proxy/gpr_proxy_dump_local_trigs_subs.c @@ -47,7 +47,7 @@ int orte_gpr_proxy_dump_local_triggers(void) orte_gpr_proxy_trigger_t **trigs; orte_std_cntr_t j, k; - opal_output(orte_gpr_base_output, "DUMP OF LOCAL TRIGGERS for [%lu,%lu,%lu]\n", + opal_output(orte_gpr_base_output, "DUMP OF LOCAL TRIGGERS for [%ld,%ld]\n", ORTE_NAME_ARGS(orte_process_info.my_name)); opal_output(orte_gpr_base_output, "Number of triggers: %lu\n", (unsigned long) orte_gpr_proxy_globals.num_trigs); @@ -72,7 +72,7 @@ int orte_gpr_proxy_dump_local_subscriptions(void) orte_gpr_proxy_subscriber_t **subs; orte_std_cntr_t j, k; - opal_output(orte_gpr_base_output, "DUMP OF LOCAL SUBSCRIPTIONS for [%lu,%lu,%lu]\n", + opal_output(orte_gpr_base_output, "DUMP OF LOCAL SUBSCRIPTIONS for [%ld,%ld]\n", ORTE_NAME_ARGS(orte_process_info.my_name)); opal_output(orte_gpr_base_output, "Number of subscriptions: %lu\n", (unsigned long) orte_gpr_proxy_globals.num_subs); diff --git a/orte/mca/gpr/replica/api_layer/gpr_replica_dump_api.c b/orte/mca/gpr/replica/api_layer/gpr_replica_dump_api.c index 1086a5fab3..af2c71cfe5 100644 --- a/orte/mca/gpr/replica/api_layer/gpr_replica_dump_api.c +++ b/orte/mca/gpr/replica/api_layer/gpr_replica_dump_api.c @@ -42,8 +42,8 @@ int orte_gpr_replica_dump_all(void) int rc; if (orte_gpr_replica_globals.debug) { - opal_output(0, "[%lu,%lu,%lu] gpr_replica_dump_all: entered", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s gpr_replica_dump_all: entered", + ORTE_NAME_PRINT(orte_process_info.my_name)); } OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); @@ -74,8 +74,8 @@ int orte_gpr_replica_dump_segments(char *segment) int rc; if (orte_gpr_replica_globals.debug) { - opal_output(0, "[%lu,%lu,%lu] gpr_replica_dump_segments: entered", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s gpr_replica_dump_segments: entered", + ORTE_NAME_PRINT(orte_process_info.my_name)); } OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); @@ -106,8 +106,8 @@ int orte_gpr_replica_dump_triggers(orte_gpr_trigger_id_t start) int rc; if (orte_gpr_replica_globals.debug) { - opal_output(0, "[%lu,%lu,%lu] gpr_replica_dump_triggers: entered", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s gpr_replica_dump_triggers: entered", + ORTE_NAME_PRINT(orte_process_info.my_name)); } OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); @@ -290,8 +290,8 @@ int orte_gpr_replica_dump_callbacks(void) int rc; if (orte_gpr_replica_globals.debug) { - opal_output(0, "[%lu,%lu,%lu] gpr_replica_dump_callbacks: entered", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s gpr_replica_dump_callbacks: entered", + ORTE_NAME_PRINT(orte_process_info.my_name)); } OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); diff --git a/orte/mca/gpr/replica/api_layer/gpr_replica_dump_local_trigs_subs_api.c b/orte/mca/gpr/replica/api_layer/gpr_replica_dump_local_trigs_subs_api.c index ea4a1d90e4..3b941787da 100644 --- a/orte/mca/gpr/replica/api_layer/gpr_replica_dump_local_trigs_subs_api.c +++ b/orte/mca/gpr/replica/api_layer/gpr_replica_dump_local_trigs_subs_api.c @@ -40,8 +40,8 @@ int orte_gpr_replica_dump_local_triggers(void) orte_gpr_replica_local_trigger_t **trigs; orte_std_cntr_t j, k; - opal_output(orte_gpr_base_output, "DUMP OF LOCAL TRIGGERS for [%lu,%lu,%lu]\n", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(orte_gpr_base_output, "DUMP OF LOCAL TRIGGERS for %s\n", + ORTE_NAME_PRINT(orte_process_info.my_name)); opal_output(orte_gpr_base_output, "Number of triggers: %lu\n", (unsigned long) orte_gpr_replica_globals.num_local_trigs); trigs = (orte_gpr_replica_local_trigger_t**)(orte_gpr_replica_globals.local_triggers)->addr; @@ -70,8 +70,8 @@ int orte_gpr_replica_dump_local_subscriptions(void) orte_gpr_replica_local_subscriber_t **subs; orte_std_cntr_t j, k; - opal_output(orte_gpr_base_output, "DUMP OF LOCAL SUBSCRIPTIONS for [%lu,%lu,%lu]\n", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(orte_gpr_base_output, "DUMP OF LOCAL SUBSCRIPTIONS for %s\n", + ORTE_NAME_PRINT(orte_process_info.my_name)); opal_output(orte_gpr_base_output, "Number of subscriptions: %lu\n", (unsigned long) orte_gpr_replica_globals.num_local_subs); subs = (orte_gpr_replica_local_subscriber_t**)(orte_gpr_replica_globals.local_subscriptions)->addr; diff --git a/orte/mca/gpr/replica/communications/gpr_replica_recv_proxy_msgs.c b/orte/mca/gpr/replica/communications/gpr_replica_recv_proxy_msgs.c index 43f91d6678..cfd9c89b70 100644 --- a/orte/mca/gpr/replica/communications/gpr_replica_recv_proxy_msgs.c +++ b/orte/mca/gpr/replica/communications/gpr_replica_recv_proxy_msgs.c @@ -64,8 +64,8 @@ void orte_gpr_replica_recv(int status, orte_process_name_t* sender, OPAL_TRACE(3); if (orte_gpr_replica_globals.debug) { - opal_output(0, "[%lu,%lu,%lu] gpr replica: received message from [%lu,%lu,%lu]", - ORTE_NAME_ARGS(orte_process_info.my_name), ORTE_NAME_ARGS(sender)); + opal_output(0, "%s gpr replica: received message from %s", + ORTE_NAME_PRINT(orte_process_info.my_name), ORTE_NAME_PRINT(sender)); } OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); diff --git a/orte/mca/gpr/replica/communications/gpr_replica_remote_msg.c b/orte/mca/gpr/replica/communications/gpr_replica_remote_msg.c index 4d81c8e3d4..6931e25908 100644 --- a/orte/mca/gpr/replica/communications/gpr_replica_remote_msg.c +++ b/orte/mca/gpr/replica/communications/gpr_replica_remote_msg.c @@ -96,7 +96,7 @@ int orte_gpr_replica_remote_notify(orte_process_name_t *recipient, * process is done */ ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - opal_output(0, "send failed to [%ld,%ld,%ld]", ORTE_NAME_ARGS(recipient)); + opal_output(0, "send failed to %s", ORTE_NAME_PRINT(recipient)); orte_dss.dump(0, message, ORTE_GPR_NOTIFY_MSG); OPAL_THREAD_LOCK(&orte_gpr_replica_globals.mutex); return ORTE_ERR_COMM_FAILURE; diff --git a/orte/mca/gpr/replica/functional_layer/gpr_replica_cleanup_fn.c b/orte/mca/gpr/replica/functional_layer/gpr_replica_cleanup_fn.c index 1098c3f043..dab95ec1bd 100644 --- a/orte/mca/gpr/replica/functional_layer/gpr_replica_cleanup_fn.c +++ b/orte/mca/gpr/replica/functional_layer/gpr_replica_cleanup_fn.c @@ -79,8 +79,8 @@ int orte_gpr_replica_cleanup_proc_fn(orte_process_name_t *proc) OPAL_TRACE(2); if (orte_gpr_replica_globals.debug) { - opal_output(0, "[%lu,%lu,%lu] gpr_replica_cleanup_proc: function entered for process [%lu,%lu,%lu]", - ORTE_NAME_ARGS(orte_process_info.my_name), ORTE_NAME_ARGS(proc)); + opal_output(0, "%s gpr_replica_cleanup_proc: function entered for process %s", + ORTE_NAME_PRINT(orte_process_info.my_name), ORTE_NAME_PRINT(proc)); } if (ORTE_SUCCESS != (rc = orte_ns.get_proc_name_string(&procname, proc))) { diff --git a/orte/mca/gpr/replica/functional_layer/gpr_replica_dump_fn.c b/orte/mca/gpr/replica/functional_layer/gpr_replica_dump_fn.c index 9ebb0fccef..992ff2dab6 100644 --- a/orte/mca/gpr/replica/functional_layer/gpr_replica_dump_fn.c +++ b/orte/mca/gpr/replica/functional_layer/gpr_replica_dump_fn.c @@ -236,8 +236,8 @@ int orte_gpr_replica_dump_callbacks_fn(orte_buffer_t *buffer) if (NULL == cb->requestor) { sprintf(tmp_out, "Local requestor"); } else { - sprintf(tmp_out, "Requestor: [%lu,%lu,%lu]", - ORTE_NAME_ARGS(cb->requestor)); + sprintf(tmp_out, "Requestor: %s", + ORTE_NAME_PRINT(cb->requestor)); } orte_gpr_replica_dump_load_string(buffer, &tmp_out); orte_gpr_base_dump_notify_msg(buffer, cb->message); @@ -420,8 +420,8 @@ int orte_gpr_replica_dump_trigger(orte_buffer_t *buffer, sprintf(tmp_out, "\t\tRequestor %lu: LOCAL@idtag %lu", (unsigned long)j, (unsigned long)attached[i]->idtag); } else { - sprintf(tmp_out, "\t\tRequestor %lu: [%lu,%lu,%lu]@idtag %lu", - (unsigned long)j, ORTE_NAME_ARGS(attached[i]->requestor), + sprintf(tmp_out, "\t\tRequestor %lu: %s@idtag %lu", + (unsigned long)j, ORTE_NAME_PRINT(attached[i]->requestor), (unsigned long)attached[i]->idtag); } orte_gpr_replica_dump_load_string(buffer, &tmp_out); @@ -435,8 +435,8 @@ int orte_gpr_replica_dump_trigger(orte_buffer_t *buffer, sprintf(tmp_out, "\tTRIGGER MASTER: LOCAL@idtag %lu", (unsigned long)trig->master->idtag); } else { - sprintf(tmp_out, "\tTRIGGER MASTER: [%lu,%lu,%lu]@idtag %lu", - ORTE_NAME_ARGS(trig->master->requestor), + sprintf(tmp_out, "\tTRIGGER MASTER: %s@idtag %lu", + ORTE_NAME_PRINT(trig->master->requestor), (unsigned long)trig->master->idtag); } } @@ -612,8 +612,8 @@ int orte_gpr_replica_dump_subscription(orte_buffer_t *buffer, sprintf(tmp_out, "\t\tRequestor: LOCAL @ subscription id %lu", (unsigned long) reqs[j]->idtag); } else { - sprintf(tmp_out, "\t\tRequestor: [%lu,%lu,%lu] @ subscription id %lu", - ORTE_NAME_ARGS(reqs[j]->requestor), + sprintf(tmp_out, "\t\tRequestor: %s @ subscription id %lu", + ORTE_NAME_PRINT(reqs[j]->requestor), (unsigned long) reqs[j]->idtag); } orte_gpr_replica_dump_load_string(buffer, &tmp_out); diff --git a/orte/mca/gpr/replica/functional_layer/gpr_replica_put_get_fn.c b/orte/mca/gpr/replica/functional_layer/gpr_replica_put_get_fn.c index 3082619e72..abe617b80f 100644 --- a/orte/mca/gpr/replica/functional_layer/gpr_replica_put_get_fn.c +++ b/orte/mca/gpr/replica/functional_layer/gpr_replica_put_get_fn.c @@ -130,8 +130,8 @@ int orte_gpr_replica_put_fn(orte_gpr_addr_mode_t addr_mode, if (orte_gpr_replica_globals.debug) { char *tmp; - opal_output(0, "[%lu,%lu,%lu] gpr_replica_put: entered on segment %s\nValues:", - ORTE_NAME_ARGS(orte_process_info.my_name), seg->name); + opal_output(0, "%s gpr_replica_put: entered on segment %s\nValues:", + ORTE_NAME_PRINT(orte_process_info.my_name), seg->name); for (i=0; i < cnt; i++) { opal_output(0, "\tKey: %s", keyvals[i]->key); } @@ -282,7 +282,7 @@ int orte_gpr_replica_put_fn(orte_gpr_addr_mode_t addr_mode, } if (orte_gpr_replica_globals.debug) { - opal_output(0, "[%lu,%lu,%lu] gpr_replica_put: complete", ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s gpr_replica_put: complete", ORTE_NAME_PRINT(orte_process_info.my_name)); } return ORTE_SUCCESS; @@ -322,8 +322,8 @@ int orte_gpr_replica_get_fn(orte_gpr_addr_mode_t addr_mode, if (orte_gpr_replica_globals.debug) { char *token; - opal_output(0, "[%lu,%lu,%lu] gpr_replica_get: entered", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s gpr_replica_get: entered", + ORTE_NAME_PRINT(orte_process_info.my_name)); opal_output(0, "\tGetting data from segment %s with %d tokens and %d keys", seg->name, num_tokens, num_keys); for (i=0; i < num_tokens; i++) { @@ -520,8 +520,8 @@ CLEANUP: OBJ_DESTRUCT(&get_list); if (orte_gpr_replica_globals.debug) { - opal_output(0, "[%lu,%lu,%lu] gpr_replica_get: finished search", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s gpr_replica_get: finished search", + ORTE_NAME_PRINT(orte_process_info.my_name)); } return rc; @@ -721,8 +721,8 @@ CLEANUP: OBJ_DESTRUCT(&get_list); if (orte_gpr_replica_globals.debug) { - opal_output(0, "[%lu,%lu,%lu] gpr_replica_get: finished search", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s gpr_replica_get: finished search", + ORTE_NAME_PRINT(orte_process_info.my_name)); } return rc; diff --git a/orte/mca/gpr/replica/functional_layer/gpr_replica_subscribe_fn.c b/orte/mca/gpr/replica/functional_layer/gpr_replica_subscribe_fn.c index b7dd17a3f4..39f1487ac1 100644 --- a/orte/mca/gpr/replica/functional_layer/gpr_replica_subscribe_fn.c +++ b/orte/mca/gpr/replica/functional_layer/gpr_replica_subscribe_fn.c @@ -52,8 +52,8 @@ int orte_gpr_replica_subscribe_fn(orte_process_name_t *requestor, OPAL_TRACE(2); if (orte_gpr_replica_globals.debug) { - opal_output(0, "[%lu,%lu,%lu] gpr_replica_subscribe: entered with num_trigs:%d", - ORTE_NAME_ARGS(orte_process_info.my_name), num_trigs); + opal_output(0, "%s gpr_replica_subscribe: entered with num_trigs:%d", + ORTE_NAME_PRINT(orte_process_info.my_name), num_trigs); } /* ensure one of the search arrays is clear - in this case, we diff --git a/orte/mca/grpcomm/basic/grpcomm_basic_module.c b/orte/mca/grpcomm/basic/grpcomm_basic_module.c index 7b8b7fe0e3..a7940089e8 100644 --- a/orte/mca/grpcomm/basic/grpcomm_basic_module.c +++ b/orte/mca/grpcomm/basic/grpcomm_basic_module.c @@ -166,7 +166,7 @@ static int xcast_nb(orte_jobid_t job, DONE: if (orte_timing) { gettimeofday(&stop, NULL); - opal_output(0, "xcast_nb [%ld,%ld,%ld]: time %ld usec", ORTE_NAME_ARGS(ORTE_PROC_MY_NAME), + opal_output(0, "xcast_nb %s: time %ld usec", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long int)((stop.tv_sec - start.tv_sec)*1000000 + (stop.tv_usec - start.tv_usec))); } @@ -248,7 +248,7 @@ DONE: if (orte_timing) { gettimeofday(&stop, NULL); - opal_output(0, "xcast [%ld,%ld,%ld]: time %ld usec", ORTE_NAME_ARGS(ORTE_PROC_MY_NAME), + opal_output(0, "xcast %s: time %ld usec", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long int)((stop.tv_sec - start.tv_sec)*1000000 + (stop.tv_usec - start.tv_usec))); } @@ -335,12 +335,11 @@ static int xcast_binomial_tree(orte_jobid_t job, } if (orte_timing) { - opal_output(0, "xcast [%ld,%ld,%ld]: mode binomial buffer size %ld", - ORTE_NAME_ARGS(ORTE_PROC_MY_NAME), (long)buf->bytes_used); + opal_output(0, "xcast %s: mode binomial buffer size %ld", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)buf->bytes_used); } /* start setting up the target recipients */ - target.cellid = ORTE_PROC_MY_NAME->cellid; target.jobid = 0; /* compute the bitmap */ @@ -374,13 +373,12 @@ static int xcast_binomial_tree(orte_jobid_t job, } OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex); - target.cellid = ORTE_PROC_MY_NAME->cellid; target.jobid = 0; for (i = hibit + 1, mask = 1 << i; i <= bitmap; ++i, mask <<= 1) { peer = rank | mask; if (peer < size) { target.vpid = (orte_vpid_t)peer; - opal_output(orte_grpcomm_basic.output, "[%ld,%ld,%ld] xcast to [%ld,%ld,%ld]", ORTE_NAME_ARGS(ORTE_PROC_MY_NAME), ORTE_NAME_ARGS(&target)); + opal_output(orte_grpcomm_basic.output, "%s xcast to %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&target)); if (0 > (rc = orte_rml.send_buffer_nb(&target, buf, ORTE_RML_TAG_ORTED_ROUTED, 0, xcast_send_cb, NULL))) { if (ORTE_ERR_ADDRESSEE_UNKNOWN != rc) { @@ -466,8 +464,8 @@ static int xcast_linear(orte_jobid_t job, } if (orte_timing) { - opal_output(0, "xcast [%ld,%ld,%ld]: mode linear buffer size %ld", - ORTE_NAME_ARGS(ORTE_PROC_MY_NAME), (long)buf->bytes_used); + opal_output(0, "xcast %s: mode linear buffer size %ld", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)buf->bytes_used); } /* get the number of daemons out there */ @@ -497,7 +495,6 @@ static int xcast_linear(orte_jobid_t job, OPAL_THREAD_UNLOCK(&orte_grpcomm_basic.mutex); /* send the message to each daemon as fast as we can */ - dummy.cellid = ORTE_PROC_MY_NAME->cellid; dummy.jobid = 0; for (i=0; i < range; i++) { if (ORTE_PROC_MY_NAME->vpid != i) { /* don't send to myself */ @@ -553,8 +550,8 @@ static int xcast_direct(orte_jobid_t job, OBJ_DESTRUCT(&attrs); if (orte_timing) { - opal_output(0, "xcast [%ld,%ld,%ld]: mode direct buffer size %ld", - ORTE_NAME_ARGS(ORTE_PROC_MY_NAME), (long)buffer->bytes_used); + opal_output(0, "xcast %s: mode direct buffer size %ld", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)buffer->bytes_used); } /* we have to account for all of the messages we are about to send diff --git a/orte/mca/iof/README.txt b/orte/mca/iof/README.txt index 7ea39b0f8e..1fcd8292b7 100644 --- a/orte/mca/iof/README.txt +++ b/orte/mca/iof/README.txt @@ -74,7 +74,7 @@ in the job. Note that the jobid=1 and the mask=2. So, we expect this to collect the stdout from any of the ranks. Obviously the second subscriber says the same thing but for stderr. The third subscriber is for receving data from stdin and sending it out to rank 0 of -the job. Notice the mask=ff which means compare cellid,jobid,vpid +the job. Notice the mask=ff which means compare jobid,vpid when addressing where the data goes. The first endpoint is created by a call to pull by the rmgr. After @@ -90,7 +90,6 @@ tied to the subscription. Hmmm, this I do not really understand. APPENDIX A These are the defines that go with the mask. #define ORTE_NS_CMP_NONE 0x00 -#define ORTE_NS_CMP_CELLID 0x01 #define ORTE_NS_CMP_JOBID 0x02 #define ORTE_NS_CMP_VPID 0x04 #define ORTE_NS_CMP_ALL 0Xff diff --git a/orte/mca/iof/proxy/iof_proxy.c b/orte/mca/iof/proxy/iof_proxy.c index a588c5f168..abcc078da3 100644 --- a/orte/mca/iof/proxy/iof_proxy.c +++ b/orte/mca/iof/proxy/iof_proxy.c @@ -127,7 +127,7 @@ int orte_iof_proxy_unpublish( #if 0 { int i = 0; - opal_output(orte_iof_base.iof_output, "[%lu,%lu,%lu] orted: ******** ABOUT TO IOF PROXY UNPUBLISH, %d", ORTE_NAME_ARGS(orte_process_info.my_name), getpid()); + opal_output(orte_iof_base.iof_output, "%s orted: ******** ABOUT TO IOF PROXY UNPUBLISH, %d", ORTE_NAME_PRINT(orte_process_info.my_name), getpid()); fflush(stderr); while (0 == i) sleep(5); } diff --git a/orte/mca/iof/svc/iof_svc_component.c b/orte/mca/iof/svc/iof_svc_component.c index e1e0dc4e1b..2084092bfa 100644 --- a/orte/mca/iof/svc/iof_svc_component.c +++ b/orte/mca/iof/svc/iof_svc_component.c @@ -114,8 +114,8 @@ orte_iof_svc_exception_handler(const orte_process_name_t* peer, orte_rml_excepti { orte_iof_base_endpoint_t *endpoint; opal_output(orte_iof_base.iof_output, - "iof svc exception handler! [%lu,%lu,%lu]\n", - ORTE_NAME_ARGS(peer)); + "iof svc exception handler! %s\n", + ORTE_NAME_PRINT((orte_process_name_t*)peer)); /* If we detect an exception on the RML connection to a peer, delete all of its subscriptions and publications. Note that diff --git a/orte/mca/iof/svc/iof_svc_proxy.c b/orte/mca/iof/svc/iof_svc_proxy.c index 308fcd950b..eaaa8ef60c 100644 --- a/orte/mca/iof/svc/iof_svc_proxy.c +++ b/orte/mca/iof/svc/iof_svc_proxy.c @@ -174,10 +174,10 @@ static void orte_iof_svc_proxy_msg( /* if the subscription origin doesn't match the message's origin, skip this subscription */ if(orte_ns.compare_fields(sub->origin_mask,&sub->origin_name,&hdr->msg_origin) == 0) { - opal_output(orte_iof_base.iof_output, "sub origin [%lu,%lu,%lu], msg origin [%lu,%lu,%lu], msg proxy [%lu,%lu,%lu] orte_iof_svc_proxy_msg: tag %d sequence %d, len %d\n", - ORTE_NAME_ARGS(&sub->origin_name), - ORTE_NAME_ARGS(&hdr->msg_origin), - ORTE_NAME_ARGS(&hdr->msg_proxy), + opal_output(orte_iof_base.iof_output, "sub origin %s, msg origin %s, msg proxy %s orte_iof_svc_proxy_msg: tag %d sequence %d, len %d\n", + ORTE_NAME_PRINT(&sub->origin_name), + ORTE_NAME_PRINT(&hdr->msg_origin), + ORTE_NAME_PRINT(&hdr->msg_proxy), hdr->msg_tag, hdr->msg_seq, hdr->msg_len); /* Everthing matched -- forward the message */ OPAL_THREAD_UNLOCK(&mca_iof_svc_component.svc_lock); @@ -239,10 +239,10 @@ static void orte_iof_svc_proxy_pub( orte_iof_base_pub_header_t* hdr) { int rc; - opal_output(orte_iof_base.iof_output, "orte_iof_svc_proxy_pub: mask %d, tag %d, proc [%lu,%lu,%lu], proxy [%lu,%lu,%lu]", + opal_output(orte_iof_base.iof_output, "orte_iof_svc_proxy_pub: mask %d, tag %d, proc %s, proxy %s", hdr->pub_mask, hdr->pub_tag, - ORTE_NAME_ARGS(&hdr->pub_name), - ORTE_NAME_ARGS(&hdr->pub_proxy)); + ORTE_NAME_PRINT(&hdr->pub_name), + ORTE_NAME_PRINT(&hdr->pub_proxy)); rc = orte_iof_svc_pub_create( &hdr->pub_name, @@ -264,10 +264,10 @@ static void orte_iof_svc_proxy_unpub( orte_iof_base_pub_header_t* hdr) { int rc; - opal_output(orte_iof_base.iof_output, "orte_iof_svc_proxy_unpub: mask %d, tag %d, proc [%lu,%lu,%lu], proxy [%lu,%lu,%lu]", + opal_output(orte_iof_base.iof_output, "orte_iof_svc_proxy_unpub: mask %d, tag %d, proc %s, proxy %s", hdr->pub_mask, hdr->pub_tag, - ORTE_NAME_ARGS(&hdr->pub_name), - ORTE_NAME_ARGS(&hdr->pub_proxy)); + ORTE_NAME_PRINT(&hdr->pub_name), + ORTE_NAME_PRINT(&hdr->pub_proxy)); rc = orte_iof_svc_pub_delete( &hdr->pub_name, diff --git a/orte/mca/iof/svc/iof_svc_pub.c b/orte/mca/iof/svc/iof_svc_pub.c index b48fc92b96..62c6ac8020 100644 --- a/orte/mca/iof/svc/iof_svc_pub.c +++ b/orte/mca/iof/svc/iof_svc_pub.c @@ -53,8 +53,8 @@ int orte_iof_svc_pub_create( pub->pub_tag = pub_tag; pub->pub_endpoint = orte_iof_base_endpoint_match(pub_name,pub_mask,pub_tag); - opal_output(orte_iof_base.iof_output, "created svc pub, name [%lu,%lu,%lu], proxy [%lu,%lu,%lu], tag %d / mask %x, endpoint %p\n", - ORTE_NAME_ARGS(pub_name), ORTE_NAME_ARGS(pub_proxy), + opal_output(orte_iof_base.iof_output, "created svc pub, name %s, proxy %s, tag %d / mask %x, endpoint %p\n", + ORTE_NAME_PRINT((orte_process_name_t*)pub_name), ORTE_NAME_PRINT((orte_process_name_t*)pub_proxy), pub_tag, pub_mask, (char*) pub->pub_endpoint); /* append this published endpoint to any matching subscription */ diff --git a/orte/mca/iof/svc/iof_svc_sub.c b/orte/mca/iof/svc/iof_svc_sub.c index 982537da44..498a754dd7 100644 --- a/orte/mca/iof/svc/iof_svc_sub.c +++ b/orte/mca/iof/svc/iof_svc_sub.c @@ -104,9 +104,9 @@ int orte_iof_svc_sub_create( sub->target_mask = target_mask; sub->target_tag = target_tag; sub->sub_endpoint = orte_iof_base_endpoint_match(&sub->target_name, sub->target_mask, sub->target_tag); - opal_output(orte_iof_base.iof_output, "created svc sub, origin [%lu,%lu,%lu] tag %d / mask %x, target [%lu,%lu,%lu], tag %d / mask %x\n", - ORTE_NAME_ARGS(origin_name), origin_tag, origin_mask, - ORTE_NAME_ARGS(target_name), target_tag, target_mask); + opal_output(orte_iof_base.iof_output, "created svc sub, origin %s tag %d / mask %x, target %s, tag %d / mask %x\n", + ORTE_NAME_PRINT((orte_process_name_t*)origin_name), origin_tag, origin_mask, + ORTE_NAME_PRINT((orte_process_name_t*)target_name), target_tag, target_mask); /* search through published endpoints for a match */ for(item = opal_list_get_first(&mca_iof_svc_component.svc_published); @@ -191,9 +191,9 @@ void orte_iof_svc_sub_ack( orte_iof_svc_sub_t* sub = (orte_iof_svc_sub_t*)s_item; opal_list_item_t *f_item; - opal_output(orte_iof_base.iof_output, "ack: checking sub origin [%lu,%lu,%lu] tag %d / mask %x, target [%lu,%lu,%lu], tag %d / mask %x\n", - ORTE_NAME_ARGS(&sub->origin_name), sub->origin_tag, sub->origin_mask, - ORTE_NAME_ARGS(&sub->target_name), sub->target_tag, sub->target_mask); + opal_output(orte_iof_base.iof_output, "ack: checking sub origin %s tag %d / mask %x, target %s, tag %d / mask %x\n", + ORTE_NAME_PRINT(&sub->origin_name), sub->origin_tag, sub->origin_mask, + ORTE_NAME_PRINT(&sub->target_name), sub->target_tag, sub->target_mask); /* If the subscription origin/tag doesn't match the ACK origin/tag, skip it */ @@ -223,8 +223,8 @@ void orte_iof_svc_sub_ack( orte_iof_svc_pub_t* pub = fwd->fwd_pub; bool value_set = true; - opal_output(orte_iof_base.iof_output, "ack: checking fwd [%lu,%lu,%lu] tag %d / mask %x\n", - ORTE_NAME_ARGS(&pub->pub_name), pub->pub_tag, pub->pub_mask); + opal_output(orte_iof_base.iof_output, "ack: checking fwd %s tag %d / mask %x\n", + ORTE_NAME_PRINT(&pub->pub_name), pub->pub_tag, pub->pub_mask); /* If the publication origin or publication proxy matches the ACK'ing proxy, save the ACK'ed byte count for this @@ -521,12 +521,12 @@ int orte_iof_svc_fwd_create( } OBJ_RETAIN(pub); fwd->fwd_pub = pub; - opal_output(orte_iof_base.iof_output, "created svc forward, sub origin [%lu,%lu,%lu], tag %d / mask %x, sub target [%lu,%lu,%lu], tag %d / mask %x :::: pub name [%lu,%lu,%lu], tag %d / mask %x\n", - ORTE_NAME_ARGS(&sub->origin_name), sub->origin_tag, + opal_output(orte_iof_base.iof_output, "created svc forward, sub origin %s, tag %d / mask %x, sub target %s, tag %d / mask %x :::: pub name %s, tag %d / mask %x\n", + ORTE_NAME_PRINT(&sub->origin_name), sub->origin_tag, sub->origin_mask, - ORTE_NAME_ARGS(&sub->target_name), sub->target_tag, + ORTE_NAME_PRINT(&sub->target_name), sub->target_tag, sub->target_mask, - ORTE_NAME_ARGS(&pub->pub_name), pub->pub_tag, pub->pub_mask); + ORTE_NAME_PRINT(&pub->pub_name), pub->pub_tag, pub->pub_mask); opal_list_append(&sub->sub_forward, &fwd->super); return ORTE_SUCCESS; } diff --git a/orte/mca/ns/base/Makefile.am b/orte/mca/ns/base/Makefile.am index 9d99d64481..dc6b5b2d81 100644 --- a/orte/mca/ns/base/Makefile.am +++ b/orte/mca/ns/base/Makefile.am @@ -24,10 +24,11 @@ libmca_ns_la_SOURCES += \ base/ns_base_close.c \ base/ns_base_select.c \ base/ns_base_open.c \ - base/ns_base_cell_fns.c \ + base/ns_base_node_fns.c \ base/ns_base_job_fns.c \ base/ns_base_vpid_name_fns.c \ base/ns_base_general_fns.c \ + base/ns_base_print_name_args.c \ base/ns_base_diag_fns.c \ base/data_type_support/ns_data_type_compare_fns.c \ base/data_type_support/ns_data_type_copy_fns.c \ diff --git a/orte/mca/ns/base/base.h b/orte/mca/ns/base/base.h index 262d977f18..39591e766b 100644 --- a/orte/mca/ns/base/base.h +++ b/orte/mca/ns/base/base.h @@ -49,6 +49,7 @@ extern "C" { ORTE_DECLSPEC int orte_ns_base_open(void); ORTE_DECLSPEC int orte_ns_base_select(void); ORTE_DECLSPEC int orte_ns_base_close(void); +ORTE_DECLSPEC int orte_ns_base_init_print_args(void); /* * globals that might be needed diff --git a/orte/mca/ns/base/data_type_support/ns_data_type_compare_fns.c b/orte/mca/ns/base/data_type_support/ns_data_type_compare_fns.c index 683f6f1ae7..a55b1490c5 100755 --- a/orte/mca/ns/base/data_type_support/ns_data_type_compare_fns.c +++ b/orte/mca/ns/base/data_type_support/ns_data_type_compare_fns.c @@ -49,18 +49,6 @@ int orte_ns_base_compare_name(orte_process_name_t *value1, * value - a totally useless result, but consistent in behavior. */ - /** check the cellids - if one of them is WILDCARD, then ignore - * this field since anything is okay - */ - if (value1->cellid != ORTE_CELLID_WILDCARD && - value2->cellid != ORTE_CELLID_WILDCARD) { - if (value1->cellid < value2->cellid) { - return ORTE_VALUE2_GREATER; - } else if (value1->cellid > value2->cellid) { - return ORTE_VALUE1_GREATER; - } - } - /** check the jobids - if one of them is WILDCARD, then ignore * this field since anything is okay */ @@ -120,21 +108,6 @@ int orte_ns_base_compare_jobid(orte_jobid_t *value1, return ORTE_EQUAL; } -int orte_ns_base_compare_cellid(orte_cellid_t *value1, - orte_cellid_t *value2, - orte_data_type_t type) -{ - /** if either value is WILDCARD, then return equal */ - if (*value1 == ORTE_CELLID_WILDCARD || - *value2 == ORTE_CELLID_WILDCARD) return ORTE_EQUAL; - - if (*value1 > *value2) return ORTE_VALUE1_GREATER; - - if (*value2 > *value1) return ORTE_VALUE2_GREATER; - - return ORTE_EQUAL; -} - int orte_ns_base_compare_nodeid(orte_nodeid_t *value1, orte_nodeid_t *value2, orte_data_type_t type) diff --git a/orte/mca/ns/base/data_type_support/ns_data_type_copy_fns.c b/orte/mca/ns/base/data_type_support/ns_data_type_copy_fns.c index 9ddfad4a42..e7e829a959 100755 --- a/orte/mca/ns/base/data_type_support/ns_data_type_copy_fns.c +++ b/orte/mca/ns/base/data_type_support/ns_data_type_copy_fns.c @@ -44,25 +44,6 @@ int orte_ns_base_copy_vpid(orte_vpid_t **dest, orte_vpid_t *src, orte_data_type_ return ORTE_SUCCESS; } -/* - * CELLID - */ -int orte_ns_base_copy_cellid(orte_cellid_t **dest, orte_cellid_t *src, orte_data_type_t type) -{ - orte_cellid_t *val; - - val = (orte_cellid_t*)malloc(sizeof(orte_cellid_t)); - if (NULL == val) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - *val = *src; - *dest = val; - - return ORTE_SUCCESS; -} - /* * NODEID */ @@ -114,7 +95,6 @@ int orte_ns_base_copy_name(orte_process_name_t **dest, orte_process_name_t *src, return ORTE_ERR_OUT_OF_RESOURCE; } - val->cellid = src->cellid; val->jobid = src->jobid; val->vpid = src->vpid; diff --git a/orte/mca/ns/base/data_type_support/ns_data_type_packing_fns.c b/orte/mca/ns/base/data_type_support/ns_data_type_packing_fns.c index 7fa6f290ac..c69301c356 100644 --- a/orte/mca/ns/base/data_type_support/ns_data_type_packing_fns.c +++ b/orte/mca/ns/base/data_type_support/ns_data_type_packing_fns.c @@ -39,30 +39,9 @@ int orte_ns_base_pack_name(orte_buffer_t *buffer, const void *src, int rc; orte_std_cntr_t i; orte_process_name_t* proc; - orte_cellid_t *cellid; orte_jobid_t *jobid; orte_vpid_t *vpid; - /* collect all the cellids in a contiguous array */ - cellid = (orte_cellid_t*)malloc(num_vals * sizeof(orte_cellid_t)); - if (NULL == cellid) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - proc = (orte_process_name_t*)src; - for (i=0; i < num_vals; i++) { - cellid[i] = proc->cellid; - proc++; - } - /* now pack them in one shot */ - if (ORTE_SUCCESS != (rc = - orte_ns_base_pack_cellid(buffer, cellid, num_vals, ORTE_CELLID))) { - ORTE_ERROR_LOG(rc); - free(cellid); - return rc; - } - free(cellid); - /* collect all the jobids in a contiguous array */ jobid = (orte_jobid_t*)malloc(num_vals * sizeof(orte_jobid_t)); if (NULL == jobid) { @@ -106,23 +85,6 @@ int orte_ns_base_pack_name(orte_buffer_t *buffer, const void *src, return ORTE_SUCCESS; } -/* - * CELLID - */ -int orte_ns_base_pack_cellid(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type) -{ - int ret; - - /* Turn around and pack the real type */ - if (ORTE_SUCCESS != ( - ret = orte_dss_pack_buffer(buffer, src, num_vals, ORTE_CELLID_T))) { - ORTE_ERROR_LOG(ret); - } - - return ret; -} - /* * NODEID */ diff --git a/orte/mca/ns/base/data_type_support/ns_data_type_print_fns.c b/orte/mca/ns/base/data_type_support/ns_data_type_print_fns.c index 552dbe9f2a..13d1c5082d 100755 --- a/orte/mca/ns/base/data_type_support/ns_data_type_print_fns.c +++ b/orte/mca/ns/base/data_type_support/ns_data_type_print_fns.c @@ -45,10 +45,6 @@ int orte_ns_base_std_print(char **output, char *prefix, void *src, orte_data_typ orte_ns_base_quick_print(output, "ORTE_JOBID", prefix, src, sizeof(orte_jobid_t)); break; - case ORTE_CELLID: - orte_ns_base_quick_print(output, "ORTE_CELLID", prefix, src, sizeof(orte_cellid_t)); - break; - case ORTE_NODEID: orte_ns_base_quick_print(output, "ORTE_NODEID", prefix, src, sizeof(orte_nodeid_t)); break; @@ -73,8 +69,8 @@ int orte_ns_base_print_name(char **output, char *prefix, orte_process_name_t *na asprintf(output, "%sData type: ORTE_PROCESS_NAME\tData Value: NULL", (NULL == prefix ? " " : prefix)); } else { - asprintf(output, "%sData type: ORTE_PROCESS_NAME\tData Value: [%ld,%ld,%ld]", - (NULL == prefix ? " " : prefix), (long)name->cellid, + asprintf(output, "%sData type: ORTE_PROCESS_NAME\tData Value: [%ld,%ld]", + (NULL == prefix ? " " : prefix), (long)name->jobid, (long)name->vpid); } diff --git a/orte/mca/ns/base/data_type_support/ns_data_type_size_fns.c b/orte/mca/ns/base/data_type_support/ns_data_type_size_fns.c index 2abbe6630e..fbe7bb218a 100755 --- a/orte/mca/ns/base/data_type_support/ns_data_type_size_fns.c +++ b/orte/mca/ns/base/data_type_support/ns_data_type_size_fns.c @@ -40,10 +40,6 @@ int orte_ns_base_std_size(size_t *size, void *src, orte_data_type_t type) *size = sizeof(orte_jobid_t); break; - case ORTE_CELLID: - *size = sizeof(orte_cellid_t); - break; - case ORTE_NODEID: *size = sizeof(orte_nodeid_t); break; diff --git a/orte/mca/ns/base/data_type_support/ns_data_type_unpacking_fns.c b/orte/mca/ns/base/data_type_support/ns_data_type_unpacking_fns.c index 9506c04652..b3487ade14 100644 --- a/orte/mca/ns/base/data_type_support/ns_data_type_unpacking_fns.c +++ b/orte/mca/ns/base/data_type_support/ns_data_type_unpacking_fns.c @@ -37,34 +37,16 @@ int orte_ns_base_unpack_name(orte_buffer_t *buffer, void *dest, int rc; orte_std_cntr_t i, num; orte_process_name_t* proc; - orte_cellid_t *cellid; orte_jobid_t *jobid; orte_vpid_t *vpid; num = *num_vals; - /* allocate space for all the cellids in a contiguous array */ - cellid = (orte_cellid_t*)malloc(num * sizeof(orte_cellid_t)); - if (NULL == cellid) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - *num_vals = 0; - return ORTE_ERR_OUT_OF_RESOURCE; - } - /* now unpack them in one shot */ - if (ORTE_SUCCESS != (rc = - orte_ns_base_unpack_cellid(buffer, cellid, num_vals, ORTE_CELLID))) { - ORTE_ERROR_LOG(rc); - *num_vals = 0; - free(cellid); - return rc; - } - /* allocate space for all the jobids in a contiguous array */ jobid = (orte_jobid_t*)malloc(num * sizeof(orte_jobid_t)); if (NULL == jobid) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); *num_vals = 0; - free(cellid); return ORTE_ERR_OUT_OF_RESOURCE; } /* now unpack them in one shot */ @@ -73,7 +55,6 @@ int orte_ns_base_unpack_name(orte_buffer_t *buffer, void *dest, ORTE_ERROR_LOG(rc); *num_vals = 0; free(jobid); - free(cellid); return rc; } @@ -83,7 +64,6 @@ int orte_ns_base_unpack_name(orte_buffer_t *buffer, void *dest, ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); *num_vals = 0; free(jobid); - free(cellid); return ORTE_ERR_OUT_OF_RESOURCE; } /* now unpack them in one shot */ @@ -93,14 +73,12 @@ int orte_ns_base_unpack_name(orte_buffer_t *buffer, void *dest, *num_vals = 0; free(vpid); free(jobid); - free(cellid); return rc; } - /* build the names from the cellid/jobid/vpid arrays */ + /* build the names from the jobid/vpid arrays */ proc = (orte_process_name_t*)dest; for (i=0; i < num; i++) { - proc->cellid = cellid[i]; proc->jobid = jobid[i]; proc->vpid = vpid[i]; proc++; @@ -109,27 +87,10 @@ int orte_ns_base_unpack_name(orte_buffer_t *buffer, void *dest, /* cleanup */ free(vpid); free(jobid); - free(cellid); return ORTE_SUCCESS; } -/* - * CELLID - */ -int orte_ns_base_unpack_cellid(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type) -{ - int ret; - - /* Turn around and unpack the real type */ - if (ORTE_SUCCESS != (ret = orte_dss_unpack_buffer(buffer, dest, num_vals, ORTE_CELLID_T))) { - ORTE_ERROR_LOG(ret); - } - - return ret; -} - /* * NODEID */ diff --git a/orte/mca/ns/base/ns_base_diag_fns.c b/orte/mca/ns/base/ns_base_diag_fns.c index 05df176102..d31661ba10 100644 --- a/orte/mca/ns/base/ns_base_diag_fns.c +++ b/orte/mca/ns/base/ns_base_diag_fns.c @@ -45,13 +45,6 @@ * "not available" functions */ -int -orte_ns_base_dump_cells_not_available(void) -{ - ORTE_ERROR_LOG(ORTE_ERR_UNREACH); - return ORTE_ERR_UNREACH; -} - int orte_ns_base_dump_jobs_not_available(void) { diff --git a/orte/mca/ns/base/ns_base_cell_fns.c b/orte/mca/ns/base/ns_base_node_fns.c similarity index 50% rename from orte/mca/ns/base/ns_base_cell_fns.c rename to orte/mca/ns/base/ns_base_node_fns.c index d4f9d8b543..7a4ca13791 100644 --- a/orte/mca/ns/base/ns_base_cell_fns.c +++ b/orte/mca/ns/base/ns_base_node_fns.c @@ -44,25 +44,7 @@ * "not available" functions */ int -orte_ns_base_create_cellid_not_available(orte_cellid_t *cellid, char *site, char *resource) -{ - *cellid = ORTE_CELLID_INVALID; - ORTE_ERROR_LOG(ORTE_ERR_UNREACH); - return ORTE_ERR_UNREACH; -} - -int -orte_ns_base_get_cell_info_not_available(orte_cellid_t cellid, - char **site, char **resource) -{ - *site = NULL; - *resource = NULL; - ORTE_ERROR_LOG(ORTE_ERR_UNREACH); - return ORTE_ERR_UNREACH; -} - -int -orte_ns_base_create_nodeids_not_available(orte_nodeid_t **nodeids, orte_std_cntr_t *nnodes, orte_cellid_t cellid, char **nodename) +orte_ns_base_create_nodeids_not_available(orte_nodeid_t **nodeids, orte_std_cntr_t *nnodes, char **nodename) { *nodeids = NULL; *nnodes = 0; @@ -71,8 +53,7 @@ orte_ns_base_create_nodeids_not_available(orte_nodeid_t **nodeids, orte_std_cntr } int -orte_ns_base_get_node_info_not_available(char ***nodenames, orte_cellid_t cellid, - orte_std_cntr_t num_nodeids, orte_nodeid_t *nodeids) +orte_ns_base_get_node_info_not_available(char ***nodenames, orte_std_cntr_t num_nodeids, orte_nodeid_t *nodeids) { *nodenames = NULL; ORTE_ERROR_LOG(ORTE_ERR_UNREACH); @@ -80,95 +61,6 @@ orte_ns_base_get_node_info_not_available(char ***nodenames, orte_cellid_t cellid } -/**** CELL STRING FUNCTIONS ****/ -int orte_ns_base_get_cellid_string(char **cellid_string, const orte_process_name_t* name) -{ - if (NULL == name) { /* got an error */ - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - *cellid_string = NULL; - return ORTE_ERR_BAD_PARAM; - } - - /* check for wildcard value - handle appropriately */ - if (ORTE_CELLID_WILDCARD == name->cellid) { - *cellid_string = strdup(ORTE_SCHEMA_WILDCARD_STRING); - return ORTE_SUCCESS; - } - - /* check for invalid value - handle appropriately */ - if (ORTE_CELLID_INVALID == name->cellid) { - *cellid_string = strdup(ORTE_SCHEMA_INVALID_STRING); - return ORTE_SUCCESS; - } - - if (0 > asprintf(cellid_string, "%ld", (long) name->cellid)) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - return ORTE_SUCCESS; -} - - -int orte_ns_base_convert_cellid_to_string(char **cellid_string, const orte_cellid_t cellid) -{ - /* check for wildcard value - handle appropriately */ - if (ORTE_CELLID_WILDCARD == cellid) { - *cellid_string = strdup(ORTE_SCHEMA_WILDCARD_STRING); - return ORTE_SUCCESS; - } - - /* check for invalid value - handle appropriately */ - if (ORTE_CELLID_INVALID == cellid) { - *cellid_string = strdup(ORTE_SCHEMA_INVALID_STRING); - return ORTE_SUCCESS; - } - - if (0 > asprintf(cellid_string, "%ld", (long) cellid)) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - return ORTE_SUCCESS; -} - - -int orte_ns_base_convert_string_to_cellid(orte_cellid_t *cellid, const char *cellidstring) -{ - long int tmpint; - - if (NULL == cellidstring) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - *cellid = ORTE_CELLID_INVALID; - return ORTE_ERR_BAD_PARAM; - } - - /** check for wildcard string - handle appropriately */ - if (0 == strcmp(ORTE_SCHEMA_WILDCARD_STRING, cellidstring)) { - *cellid = ORTE_CELLID_WILDCARD; - return ORTE_SUCCESS; - } - - /** check for invalid string - handle appropriately */ - if (0 == strcmp(ORTE_SCHEMA_INVALID_STRING, cellidstring)) { - *cellid = ORTE_CELLID_INVALID; - return ORTE_SUCCESS; - } - - tmpint = strtol(cellidstring, NULL, 10); - - if (ORTE_CELLID_MAX >= tmpint && ORTE_CELLID_MIN <= tmpint) { - *cellid = (orte_cellid_t)tmpint; - } else { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - *cellid = ORTE_CELLID_INVALID; - return ORTE_ERR_BAD_PARAM; - } - - return ORTE_SUCCESS; -} - - /**** NODEID STRING FUNCTIONS ****/ int orte_ns_base_convert_nodeid_to_string(char **string, const orte_nodeid_t nodeid) { diff --git a/orte/mca/ns/base/ns_base_open.c b/orte/mca/ns/base/ns_base_open.c index ff01fa3c29..3273b5008f 100644 --- a/orte/mca/ns/base/ns_base_open.c +++ b/orte/mca/ns/base/ns_base_open.c @@ -44,9 +44,9 @@ * globals */ -orte_process_name_t orte_ns_name_wildcard = {ORTE_CELLID_WILDCARD, ORTE_JOBID_WILDCARD, ORTE_VPID_WILDCARD}; -orte_process_name_t orte_ns_name_invalid = {ORTE_CELLID_INVALID, ORTE_JOBID_INVALID, ORTE_VPID_INVALID}; -orte_process_name_t orte_ns_name_my_hnp = {0, 0, 0}; +orte_process_name_t orte_ns_name_wildcard = {ORTE_JOBID_WILDCARD, ORTE_VPID_WILDCARD}; +orte_process_name_t orte_ns_name_invalid = {ORTE_JOBID_INVALID, ORTE_VPID_INVALID}; +orte_process_name_t orte_ns_name_my_hnp = {0, 0}; /* * Global variables @@ -55,12 +55,6 @@ int mca_ns_base_output = -1; mca_ns_base_module_t orte_ns = { /* init */ orte_ns_base_module_init_not_available, - /* cell functions */ - orte_ns_base_create_cellid_not_available, - orte_ns_base_get_cell_info_not_available, - orte_ns_base_get_cellid_string, - orte_ns_base_convert_cellid_to_string, - orte_ns_base_convert_string_to_cellid, /* node functions */ orte_ns_base_create_nodeids_not_available, orte_ns_base_get_node_info_not_available, @@ -95,7 +89,6 @@ mca_ns_base_module_t orte_ns = { /* data type functions */ orte_ns_base_define_data_type_not_available, /* diagnostic functions */ - orte_ns_base_dump_cells_not_available, orte_ns_base_dump_jobs_not_available, orte_ns_base_dump_tags_not_available, orte_ns_base_dump_datatypes_not_available, @@ -157,6 +150,12 @@ int orte_ns_base_open(void) } mca_ns_base_output = opal_output_open(&kill_prefix); + /* setup the print_args function */ + if (ORTE_SUCCESS != (rc = orte_ns_base_init_print_args())) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* register the base system types with the DPS */ tmp = ORTE_NAME; if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_ns_base_pack_name, @@ -200,21 +199,7 @@ int orte_ns_base_open(void) return rc; } - tmp = ORTE_CELLID; - if (ORTE_SUCCESS != (rc = orte_dss.register_type(orte_ns_base_pack_cellid, - orte_ns_base_unpack_cellid, - (orte_dss_copy_fn_t)orte_ns_base_copy_cellid, - (orte_dss_compare_fn_t)orte_ns_base_compare_cellid, - (orte_dss_size_fn_t)orte_ns_base_std_size, - (orte_dss_print_fn_t)orte_ns_base_std_print, - (orte_dss_release_fn_t)orte_ns_base_std_release, - ORTE_DSS_UNSTRUCTURED, - "ORTE_CELLID", &tmp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* Open up all available components */ + /* Open up all available components */ if (ORTE_SUCCESS != mca_base_components_open("ns", mca_ns_base_output, diff --git a/orte/mca/ns/base/ns_base_print_name_args.c b/orte/mca/ns/base/ns_base_print_name_args.c new file mode 100644 index 0000000000..95becf2fc7 --- /dev/null +++ b/orte/mca/ns/base/ns_base_print_name_args.c @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2006 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ +#include "orte_config.h" +#include "orte/orte_constants.h" + +#include +#include + +#include "opal/util/output.h" +#include "opal/util/printf.h" +#include "opal/threads/tsd.h" + +#include "orte/mca/errmgr/errmgr.h" + +#include "orte/mca/ns/ns_types.h" +#include "orte/mca/ns/base/base.h" + +#define ORTE_PRINT_NAME_ARGS_MAX_SIZE 20 + +static opal_tsd_key_t print_args_tsd_key; +char* orte_print_args_null = "NULL"; + +static void +buffer_cleanup(void *value) +{ + if (NULL != value) free(value); +} + +static char* +get_print_name_buffer(void) +{ + void *buffer; + int ret; + + ret = opal_tsd_getspecific(print_args_tsd_key, &buffer); + if (OPAL_SUCCESS != ret) return NULL; + + if (NULL == buffer) { + buffer = (void*) malloc((ORTE_PRINT_NAME_ARGS_MAX_SIZE+1) * sizeof(char)); + ret = opal_tsd_setspecific(print_args_tsd_key, buffer); + } + + return (char*) buffer; +} + +char* orte_ns_base_print_name_args(orte_process_name_t *name) +{ + char *print_name_buf = get_print_name_buffer(); + + if (NULL == print_name_buf) { + ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); + return orte_print_args_null; + } + + if (NULL == name) { + snprintf(print_name_buf, ORTE_PRINT_NAME_ARGS_MAX_SIZE, "[NO-NAME]"); + } else { + snprintf(print_name_buf, ORTE_PRINT_NAME_ARGS_MAX_SIZE, "[%ld,%ld]", (long)name->jobid, (long)name->vpid); + } + return print_name_buf; +} + +int +orte_ns_base_init_print_args(void) +{ + return opal_tsd_key_create(&print_args_tsd_key, buffer_cleanup); +} diff --git a/orte/mca/ns/base/ns_base_vpid_name_fns.c b/orte/mca/ns/base/ns_base_vpid_name_fns.c index c210f86c30..a8183e8fc3 100644 --- a/orte/mca/ns/base/ns_base_vpid_name_fns.c +++ b/orte/mca/ns/base/ns_base_vpid_name_fns.c @@ -55,7 +55,7 @@ orte_ns_base_create_my_name_not_available(void) int orte_ns_base_get_proc_name_string(char **name_string, const orte_process_name_t* name) { - char *tmp, *tmp2; + char *tmp; if (NULL == name) { /* got an error */ ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); @@ -66,31 +66,22 @@ int orte_ns_base_get_proc_name_string(char **name_string, * corresponding string so we can correctly parse the name string when * it is passed back to us later */ - if (ORTE_CELLID_WILDCARD == name->cellid) { - tmp = strdup(ORTE_SCHEMA_WILDCARD_STRING); - } else if (ORTE_CELLID_INVALID == name->cellid) { - tmp = strdup(ORTE_SCHEMA_INVALID_STRING); - } else { - asprintf(&tmp, "%ld", (long)name->cellid); - } - if (ORTE_JOBID_WILDCARD == name->jobid) { - asprintf(&tmp2, "%s%c%s", tmp, ORTE_SCHEMA_DELIMITER_CHAR, ORTE_SCHEMA_WILDCARD_STRING); + asprintf(&tmp, "%s", ORTE_SCHEMA_WILDCARD_STRING); } else if (ORTE_JOBID_INVALID == name->jobid) { - asprintf(&tmp2, "%s%c%s", tmp, ORTE_SCHEMA_DELIMITER_CHAR, ORTE_SCHEMA_INVALID_STRING); + asprintf(&tmp, "%s", ORTE_SCHEMA_INVALID_STRING); } else { - asprintf(&tmp2, "%s%c%ld", tmp, ORTE_SCHEMA_DELIMITER_CHAR, (long)name->jobid); + asprintf(&tmp, "%ld", (long)name->jobid); } - free(tmp); if (ORTE_VPID_WILDCARD == name->vpid) { - asprintf(name_string, "%s%c%s", tmp2, ORTE_SCHEMA_DELIMITER_CHAR, ORTE_SCHEMA_WILDCARD_STRING); + asprintf(name_string, "%s%c%s", tmp, ORTE_SCHEMA_DELIMITER_CHAR, ORTE_SCHEMA_WILDCARD_STRING); } else if (ORTE_VPID_INVALID == name->vpid) { - asprintf(name_string, "%s%c%s", tmp2, ORTE_SCHEMA_DELIMITER_CHAR, ORTE_SCHEMA_INVALID_STRING); + asprintf(name_string, "%s%c%s", tmp, ORTE_SCHEMA_DELIMITER_CHAR, ORTE_SCHEMA_INVALID_STRING); } else { - asprintf(name_string, "%s%c%ld", tmp2, ORTE_SCHEMA_DELIMITER_CHAR, (long)name->vpid); + asprintf(name_string, "%s%c%ld", tmp, ORTE_SCHEMA_DELIMITER_CHAR, (long)name->vpid); } - free(tmp2); + free(tmp); return ORTE_SUCCESS; } @@ -99,7 +90,6 @@ int orte_ns_base_convert_string_to_process_name(orte_process_name_t **name, const char* name_string) { char *temp, *token; - orte_cellid_t cell; orte_jobid_t job; orte_vpid_t vpid; long int tmpint; @@ -112,45 +102,16 @@ int orte_ns_base_convert_string_to_process_name(orte_process_name_t **name, } temp = strdup(name_string); /** copy input string as the strtok process is destructive */ - token = strtok(temp, ORTE_SCHEMA_DELIMITER_STRING); /** get first field -> cellid */ + token = strtok(temp, ORTE_SCHEMA_DELIMITER_STRING); /** get first field -> jobid */ /* check for error */ if (NULL == token) { return ORTE_ERR_BAD_PARAM; } - /* convert to largest possible int - then - * check to ensure it is within range of cellid_t before casting - */ - - /* first, though, check for WILDCARD character - assign - * value accordingly, if found - */ - if (0 == strcmp(token, ORTE_SCHEMA_WILDCARD_STRING)) { - cell = ORTE_CELLID_WILDCARD; - } else if (0 == strcmp(token, ORTE_SCHEMA_INVALID_STRING)) { - cell = ORTE_CELLID_INVALID; - } else { - tmpint = strtol(token, NULL, 10); - if (ORTE_CELLID_MAX >= tmpint && ORTE_CELLID_MIN <= tmpint) { - cell = (orte_cellid_t)tmpint; - } else { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return_code = ORTE_ERR_BAD_PARAM; - goto CLEANUP; - } - } - - token = strtok(NULL, ORTE_SCHEMA_DELIMITER_STRING); /** get next field -> jobid */ - /** convert to largest possible int - then * check to ensure it is within range of jobid_t before casting */ - /* check for error */ - if (NULL == token) { - return ORTE_ERR_BAD_PARAM; - } - /** first, though, check for WILDCARD character - assign * value accordingly, if found */ @@ -198,7 +159,7 @@ int orte_ns_base_convert_string_to_process_name(orte_process_name_t **name, } if (ORTE_SUCCESS != (return_code = - orte_ns_base_create_process_name(name, cell, job, vpid))) { + orte_ns_base_create_process_name(name, job, vpid))) { ORTE_ERROR_LOG(return_code); } @@ -210,7 +171,6 @@ CLEANUP: /**** CREATE PROCESS NAME ****/ int orte_ns_base_create_process_name(orte_process_name_t **name, - orte_cellid_t cell, orte_jobid_t job, orte_vpid_t vpid) { @@ -222,7 +182,6 @@ int orte_ns_base_create_process_name(orte_process_name_t **name, return ORTE_ERR_OUT_OF_RESOURCE; } - (*name)->cellid = cell; (*name)->jobid = job; (*name)->vpid = vpid; return ORTE_SUCCESS; @@ -337,16 +296,8 @@ int orte_ns_base_compare_fields(orte_ns_cmp_bitmask_t fields, * function does not actually stand for a wildcard value, but * rather a specific value */ - if (ORTE_NS_CMP_CELLID & fields) { /* check cellid field */ - if (name1->cellid < name2->cellid) { - return ORTE_VALUE2_GREATER; - } else if (name1->cellid > name2->cellid) { - return ORTE_VALUE1_GREATER; - } - } - - /* get here if cellid's are equal, or cellid not being checked */ - /* now check job id */ + + /* check job id */ if (ORTE_NS_CMP_JOBID & fields) { if (name1->jobid < name2->jobid) { @@ -356,8 +307,7 @@ int orte_ns_base_compare_fields(orte_ns_cmp_bitmask_t fields, } } - /* get here if cellid's and jobid's are equal, or neither being checked, - * or cellid not checked and jobid's equal. + /* get here if jobid's are equal, or not being checked * now check vpid */ @@ -370,8 +320,7 @@ int orte_ns_base_compare_fields(orte_ns_cmp_bitmask_t fields, } /* only way to get here is if all fields are being checked and are equal, - * or cellid not checked, but jobid and vpid equal, - * or cellid and jobid not checked, but vpid equal, + * or jobid not checked, but vpid equal, * only vpid being checked, and equal * return that fact */ diff --git a/orte/mca/ns/base/ns_private.h b/orte/mca/ns/base/ns_private.h index 01367c3761..f1c4cb8d4a 100644 --- a/orte/mca/ns/base/ns_private.h +++ b/orte/mca/ns/base/ns_private.h @@ -57,7 +57,6 @@ typedef uint8_t orte_ns_cmd_flag_t; * typedefs above and in ns_types.h */ #define ORTE_NS_CMD ORTE_INT8 -#define ORTE_CELLID_T ORTE_INT32 #define ORTE_NODEID_T ORTE_INT32 #define ORTE_JOBID_T ORTE_INT32 #define ORTE_VPID_T ORTE_INT32 @@ -65,8 +64,6 @@ typedef uint8_t orte_ns_cmd_flag_t; /* * define flag values for remote commands - only used internally */ -#define ORTE_NS_CREATE_CELLID_CMD (int8_t) 1 -#define ORTE_NS_GET_CELL_INFO_CMD (int8_t) 2 #define ORTE_NS_CREATE_NODEID_CMD (int8_t) 3 #define ORTE_NS_GET_NODE_INFO_CMD (int8_t) 4 #define ORTE_NS_CREATE_JOBID_CMD (int8_t) 5 @@ -79,7 +76,6 @@ typedef uint8_t orte_ns_cmd_flag_t; #define ORTE_NS_GET_PEERS_CMD (int8_t) 12 #define ORTE_NS_DEFINE_DATA_TYPE_CMD (int8_t) 13 -#define ORTE_NS_DUMP_CELLS_CMD (int8_t) 15 #define ORTE_NS_DUMP_JOBIDS_CMD (int8_t) 16 #define ORTE_NS_DUMP_TAGS_CMD (int8_t) 17 #define ORTE_NS_DUMP_DATATYPES_CMD (int8_t) 18 @@ -92,7 +88,6 @@ typedef uint8_t orte_ns_cmd_flag_t; */ ORTE_DECLSPEC int orte_ns_base_create_process_name(orte_process_name_t **name, - orte_cellid_t cell, orte_jobid_t job, orte_vpid_t vpid); @@ -114,19 +109,11 @@ ORTE_DECLSPEC int orte_ns_base_convert_jobid_to_string(char **jobid_string, c ORTE_DECLSPEC int orte_ns_base_convert_string_to_jobid(orte_jobid_t *jobid, const char* jobidstring); -ORTE_DECLSPEC int orte_ns_base_get_cellid_string(char **cellid_string, const orte_process_name_t* name); - -ORTE_DECLSPEC int orte_ns_base_convert_string_to_cellid(orte_cellid_t *cellid, const char *cellidstring); - -ORTE_DECLSPEC int orte_ns_base_convert_cellid_to_string(char **cellid_string, const orte_cellid_t cellid); - ORTE_DECLSPEC int orte_ns_base_get_vpid(orte_vpid_t *vpid, const orte_process_name_t* name); ORTE_DECLSPEC int orte_ns_base_get_jobid(orte_jobid_t *jobid, const orte_process_name_t* name); -ORTE_DECLSPEC int orte_ns_base_get_cellid(orte_cellid_t *cellid, const orte_process_name_t* name); - -ORTE_DECLSPEC int orte_ns_base_convert_string_to_nodeid(orte_nodeid_t *cellid, const char *string); +ORTE_DECLSPEC int orte_ns_base_convert_string_to_nodeid(orte_nodeid_t *nodeid, const char *string); ORTE_DECLSPEC int orte_ns_base_convert_nodeid_to_string(char **nodeid_string, const orte_nodeid_t nodeid); @@ -140,16 +127,10 @@ ORTE_DECLSPEC int orte_ns_base_print_dump(orte_buffer_t *buffer); /* not available functions */ ORTE_DECLSPEC int orte_ns_base_module_init_not_available(void); -ORTE_DECLSPEC int orte_ns_base_create_cellid_not_available(orte_cellid_t *cellid, - char *site, char *resource); - -ORTE_DECLSPEC int orte_ns_base_get_cell_info_not_available(orte_cellid_t cellid, - char **site, char **resource); - ORTE_DECLSPEC int orte_ns_base_create_nodeids_not_available(orte_nodeid_t **nodeids, orte_std_cntr_t *nnodes, - orte_cellid_t cellid, char **nodename); + char **nodename); -ORTE_DECLSPEC int orte_ns_base_get_node_info_not_available(char ***nodename, orte_cellid_t cellid, +ORTE_DECLSPEC int orte_ns_base_get_node_info_not_available(char ***nodename, orte_std_cntr_t num_nodes, orte_nodeid_t *nodeids); ORTE_DECLSPEC int orte_ns_base_create_jobid_not_available(orte_jobid_t *jobid, opal_list_t *attrs); @@ -186,7 +167,6 @@ ORTE_DECLSPEC int orte_ns_base_create_my_name_not_available(void); ORTE_DECLSPEC int orte_ns_base_get_peers_not_available(orte_process_name_t **procs, orte_std_cntr_t *num_procs, opal_list_t *attributes); -ORTE_DECLSPEC int orte_ns_base_dump_cells_not_available(void); ORTE_DECLSPEC int orte_ns_base_dump_jobs_not_available(void); ORTE_DECLSPEC int orte_ns_base_dump_tags_not_available(void); ORTE_DECLSPEC int orte_ns_base_dump_datatypes_not_available(void); @@ -198,9 +178,6 @@ ORTE_DECLSPEC int orte_ns_base_ft_event_not_available(int state); ORTE_DECLSPEC int orte_ns_base_pack_name(orte_buffer_t *buffer, const void *src, orte_std_cntr_t num_vals, orte_data_type_t type); -ORTE_DECLSPEC int orte_ns_base_pack_cellid(orte_buffer_t *buffer, const void *src, - orte_std_cntr_t num_vals, orte_data_type_t type); - ORTE_DECLSPEC int orte_ns_base_pack_nodeid(orte_buffer_t *buffer, const void *src, orte_std_cntr_t num_vals, orte_data_type_t type); @@ -213,9 +190,6 @@ ORTE_DECLSPEC int orte_ns_base_pack_vpid(orte_buffer_t *buffer, const void *s ORTE_DECLSPEC int orte_ns_base_unpack_name(orte_buffer_t *buffer, void *dest, orte_std_cntr_t *num_vals, orte_data_type_t type); -ORTE_DECLSPEC int orte_ns_base_unpack_cellid(orte_buffer_t *buffer, void *dest, - orte_std_cntr_t *num_vals, orte_data_type_t type); - ORTE_DECLSPEC int orte_ns_base_unpack_nodeid(orte_buffer_t *buffer, void *dest, orte_std_cntr_t *num_vals, orte_data_type_t type); @@ -233,8 +207,6 @@ int orte_ns_base_copy_name(orte_process_name_t **dest, orte_process_name_t *src, int orte_ns_base_copy_vpid(orte_vpid_t **dest, orte_vpid_t *src, orte_data_type_t type); -int orte_ns_base_copy_cellid(orte_cellid_t **dest, orte_cellid_t *src, orte_data_type_t type); - int orte_ns_base_copy_nodeid(orte_nodeid_t **dest, orte_nodeid_t *src, orte_data_type_t type); int orte_ns_base_copy_jobid(orte_jobid_t **dest, orte_jobid_t *src, orte_data_type_t type); @@ -256,10 +228,6 @@ int orte_ns_base_compare_jobid(orte_jobid_t *value1, orte_jobid_t *value2, orte_data_type_t type); -int orte_ns_base_compare_cellid(orte_cellid_t *value1, - orte_cellid_t *value2, - orte_data_type_t type); - int orte_ns_base_compare_nodeid(orte_nodeid_t *value1, orte_nodeid_t *value2, orte_data_type_t type); diff --git a/orte/mca/ns/ns.h b/orte/mca/ns/ns.h index adb2ad6d2e..5941cdb0cd 100644 --- a/orte/mca/ns/ns.h +++ b/orte/mca/ns/ns.h @@ -59,116 +59,25 @@ extern "C" { */ typedef int (*orte_ns_base_module_init_fn_t)(void); -/**** CELL FUNCTIONS ****/ -/** - * Create a new cell id. - * Allocates a new cell id for use by the caller. The function returns an - * existing cellid if the specified site/resource already has been assigned - * one. - * - * @param site The name of the site where the cell is located. - * @param resource The name of the resource associated with this cell (e.g., the name - * of the cluster). - * @param cellid The location where the cellid is to be stored. - * - * @retval ORTE_SUCCESS A cellid was created and returned. - * @retval ORTE_ERROR_VALUE An error code indicative of the problem. - * - * @endcode - */ -typedef int (*orte_ns_base_module_create_cellid_fn_t)(orte_cellid_t *cellid, - char *site, char *resource); - -/** - * Get cell info - * Retrieve the site and resource info on a cell. - * - * @param cellid The id of the cell who's info is being requested. - * @param site Returns a pointer to a strdup'd string containing the site name. - * @param resource Returns a pointer to a strdup'd string containg the resource name. - * @retval ORTE_SUCCESS A cellid was created and returned. - * @retval ORTE_ERROR_VALUE An error code indicative of the problem. - */ -typedef int (*orte_ns_base_module_get_cell_info_fn_t)(orte_cellid_t cellid, - char **site, char **resource); - -/** - * Get the cell id as a character string. - * The get_cellid_string() function returns the cell id in a character string - * representation. The string is created by expressing the field in hexadecimal. Memory - * for the string is allocated by the function - releasing that allocation is the - * responsibility of the calling program. - * - * @param *name A pointer to the name structure containing the name to be - * "translated" to a string. - * - * @retval *name_string A pointer to the character string representation of the - * cell id. - * @retval NULL Indicates an error occurred - either no memory could be allocated - * or the caller provided an incorrect name pointer (e.g., NULL). - * - * @code - * cellid-string = ompi_name_server.get_cellid_string(&name) - * @endcode - */ -typedef int (*orte_ns_base_module_get_cellid_string_fn_t)(char **cellid_string, const orte_process_name_t* name); - -/** - * Convert cellid to character string - * Returns the cellid in a character string representation. The string is created - * by expressing the provided cellid in hexadecimal. Memory for the string is - * allocated by the function - releasing that allocation is the responsibility of - * the calling program. - * - * @param cellid The cellid to be converted. - * - * @retval *cellid_string A pointer to a character string representation of the cellid. - * @retval NULL Indicates an error occurred - probably no memory could be allocated. - * - * @code - * cellid-string = ompi_name_server.convert_cellid_to_string(cellid); - * @endcode - */ - typedef int (*orte_ns_base_module_convert_cellid_to_string_fn_t)(char **cellid_string, const orte_cellid_t cellid); - - /** - * Convert a string to a cellid. - * Converts a characters string into a cellid. The character string must be a - * hexadecimal representation of a valid cellid. - * - * @param cellidstring The string to be converted. - * - * @retval cellid The resulting cellid - * @retval MCA_NS_BASE_CELLID_MAX String could not be converted. - * - * @code - * cellid = ompi_name_server.convert_string_to_cellid(cellidstring); - * @endcode - */ -typedef int (*orte_ns_base_module_convert_string_to_cellid_fn_t)(orte_cellid_t *cellid, const char *cellidstring); - - /**** NODE FUNCTIONS ****/ /* * Get an array of node id's - * Given the cell and a NULL-terminated array of names of nodes within it, this function assigns an id to represent - * each node within the cell. + * Given a NULL-terminated array of names of nodes within it, this function assigns an id to represent + * each node. */ typedef int (*orte_ns_base_module_create_nodeids_fn_t)(orte_nodeid_t **nodes, orte_std_cntr_t *nnodes, - orte_cellid_t cellid, char **nodename); + char **nodenames); /* * Get node info - * Retrieve the names of an array of nodes given their cellid and nodeids. The cellid - * is required as the nodeids are only unique within a given cell. + * Retrieve the names of an array of nodes given their nodeids. * - * @param cellid The id of the cell of the node. * @param nodeids The ids of the node. * @param nodenames Returns a pointer to a NULL-terminated array of strdup'd strings containing the node names. * @retval ORTE_SUCCESS The nodename was created and returned. * @retval ORTE_ERROR_VALUE An error code indicative of the problem. */ -typedef int (*orte_ns_base_module_get_node_info_fn_t)(char ***nodename, orte_cellid_t cellid, +typedef int (*orte_ns_base_module_get_node_info_fn_t)(char ***nodename, orte_std_cntr_t num_nodes, orte_nodeid_t *nodeids); /* @@ -260,8 +169,6 @@ typedef int (*orte_ns_base_module_get_parent_job_fn_t)(orte_jobid_t *parent, ort /** * Reserve a range of process id's. * The reserve_range() function reserves a range of vpid's for the given jobid. - * Note that the cellid does not factor into this request - jobid's span the entire universe, - * hence the cell where the process is currently executing is irrelevant to this request. * * @param jobid The id of the job for which the vpid's are to be reserved. * @param range The number of vpid's to be reserved. The function will find the @@ -350,13 +257,7 @@ typedef int (*orte_ns_base_module_convert_string_to_jobid_fn_t)(orte_jobid_t *jo * The create_process_name() function creates a single process name structure and fills the * fields with the provided values. * - * @param cell The cell for which the process name is intended. Usually, this is - * the id of the cell where the process is initially planning to be spawned. - * @param job The id of the job to which the process will belong. Process id's are - * tracked according to jobid, but not cellid. Thus, two processes - * can have the same process id if and only if they have different jobid's. However, - * two processes in the same jobid cannot have the same process id, regardless - * of whether or not they are in the same cell. + * @param job The id of the job to which the process will belong. * @param vpid The virtual process id for the name. Note that no check is made for uniqueness - * the caller is responsible for ensuring that the requested name is, in fact, unique * by first requesting reservation of an appropriate range of virtual process id's. @@ -370,7 +271,6 @@ typedef int (*orte_ns_base_module_convert_string_to_jobid_fn_t)(orte_jobid_t *jo * @endcode */ typedef int (*orte_ns_base_module_create_proc_name_fn_t)(orte_process_name_t **name, - orte_cellid_t cell, orte_jobid_t job, orte_vpid_t vpid); @@ -389,17 +289,14 @@ typedef int (*orte_ns_base_module_create_my_name_fn_t)(void); * Convert a string representation to a process name. * The convert_string_to_process_name() function converts a string representation of a process * name into an Open MPI name structure. The string must be of the proper form - i.e., it - * must be in the form "cellid.jobid.vpid", where each field is expressed in hexadecimal form. + * must be in the form "jobid.vpid", where each field is expressed in hexadecimal form. * * @param *name_string A character string representation of a process name. * - * @retval *name Pointer to an ompi_process_name_t structure containing the name. + * @retval *name Pointer to an orte_process_name_t structure containing the name. * @retval NULL Indicates an error, probably due to inability to allocate memory for * the name structure. * - * @code - * name = ompi_name_server.convert_string_to_process_name(name_string); - * @endcode */ typedef int (*orte_ns_base_module_convert_string_to_process_name_fn_t)(orte_process_name_t **name, const char* name_string); @@ -408,10 +305,7 @@ typedef int (*orte_ns_base_module_convert_string_to_process_name_fn_t)(orte_proc /** * Get the process name as a character string. * The get_proc_name_string() function returns the entire process name in a - * character string representation. The string is created by expressing each - * field in hexadecimal separated by periods, as follows: - * - * sprintf(string_name, "%x.%x.%x", cellid, jobid, vpid) + * character string representation. * * The memory required for the string is allocated by the function - releasing * that allocation is the responsibility of the calling program. @@ -436,13 +330,13 @@ typedef int (*orte_ns_base_module_get_proc_name_string_fn_t)(char **name_string, * The compare() function checks the value of the fields in the two * provided names, and returns a value indicating if the first one is less than, greater * than, or equal to the second. The value of each field is compared in a hierarchical - * fashion, with cellid first, followed by jobid and vpid in sequence. The bit-mask + * fashion, with jobid and vpid in sequence. The bit-mask * indicates which fields are to be included in the comparison. Fields not included via the - * bit-mask are ignored. Thus, the caller may request that any combination of the three fields + * bit-mask are ignored. Thus, the caller may request that any combination of the two fields * be included in the comparison. * * @param fields A bit-mask indicating which fields are to be included in the comparison. The - * comparison is performed on a hierarchical basis, with cellid being first, followed by + * comparison is performed on a hierarchical basis, with * jobid and then vpid. Each field can be included separately, thus allowing the caller * to configure the comparison to meet their needs. * @param *name1 A pointer to the first name structure. @@ -454,11 +348,6 @@ typedef int (*orte_ns_base_module_get_proc_name_string_fn_t)(char **name_string, * @retval +1 The indicated fields of the first provided name is greater than the same * fields of the second provided name. * - * The function returns a large negative value if there is an error. - * - * @code - * result = ompi_name_server.compare(bit_mask, &name1, &name2) - * @endcode */ typedef int (*orte_ns_base_module_compare_fields_fn_t)(orte_ns_cmp_bitmask_t fields, const orte_process_name_t* name1, @@ -562,9 +451,9 @@ typedef int (*orte_ns_base_module_define_data_type_fn_t)( * request that all peers for the parent job be returned, for example. * More common options would be to specify a cell or job. * - * NOTE The combination of ORTE_CELLID_WILDCARD and ORTE_JOBID_WILDCARD + * NOTE ORTE_JOBID_WILDCARD * in the attribute list will cause the function to return the names of *all* - * processes currently active in the universe. + * processes currently active. * */ typedef int (*orte_ns_base_module_get_peers_fn_t)(orte_process_name_t **procs, @@ -575,8 +464,6 @@ typedef int (*orte_ns_base_module_get_peers_fn_t)(orte_process_name_t **procs, /* * DIAGNOSTIC INTERFACES */ -typedef int (*orte_ns_base_module_dump_cells_fn_t)(void); - typedef int (*orte_ns_base_module_dump_jobs_fn_t)(void); typedef int (*orte_ns_base_module_dump_tags_fn_t)(void); @@ -591,12 +478,6 @@ typedef int (*orte_ns_base_module_ft_event_fn_t)(int state); struct mca_ns_base_module_2_0_0_t { /* init */ orte_ns_base_module_init_fn_t init; - /* cell functions */ - orte_ns_base_module_create_cellid_fn_t create_cellid; - orte_ns_base_module_get_cell_info_fn_t get_cell_info; - orte_ns_base_module_get_cellid_string_fn_t get_cellid_string; - orte_ns_base_module_convert_cellid_to_string_fn_t convert_cellid_to_string; - orte_ns_base_module_convert_string_to_cellid_fn_t convert_string_to_cellid; /** node functions */ orte_ns_base_module_create_nodeids_fn_t create_nodeids; orte_ns_base_module_get_node_info_fn_t get_node_info; @@ -631,7 +512,6 @@ struct mca_ns_base_module_2_0_0_t { /* data type functions */ orte_ns_base_module_define_data_type_fn_t define_data_type; /* diagnostic functions */ - orte_ns_base_module_dump_cells_fn_t dump_cells; orte_ns_base_module_dump_jobs_fn_t dump_jobs; orte_ns_base_module_dump_tags_fn_t dump_tags; orte_ns_base_module_dump_datatypes_fn_t dump_datatypes; diff --git a/orte/mca/ns/ns_types.h b/orte/mca/ns/ns_types.h index 793e07404f..f7f97d8433 100644 --- a/orte/mca/ns/ns_types.h +++ b/orte/mca/ns/ns_types.h @@ -50,7 +50,6 @@ extern "C" { /**** NS ATTRIBUTES ****/ #define ORTE_NS_USE_PARENT "orte-ns-use-parent" #define ORTE_NS_USE_ROOT "orte-ns-use-root" -#define ORTE_NS_USE_CELL "orte-ns-use-cell" #define ORTE_NS_USE_JOBID "orte-ns-use-job" #define ORTE_NS_USE_NODE "orte-ns-use-node" #define ORTE_NS_INCLUDE_DESCENDANTS "orte-ns-include-desc" @@ -59,7 +58,6 @@ extern "C" { #define ORTE_NAME_ARGS(n) \ - (long) ((NULL == n) ? (long)-1 : (long)(n)->cellid), \ (long) ((NULL == n) ? (long)-1 : (long)(n)->jobid), \ (long) ((NULL == n) ? (long)-1 : (long)(n)->vpid) @@ -69,7 +67,6 @@ extern "C" { */ #define ORTE_NS_CMP_NONE 0x00 -#define ORTE_NS_CMP_CELLID 0x01 #define ORTE_NS_CMP_JOBID 0x02 #define ORTE_NS_CMP_VPID 0x04 #define ORTE_NS_CMP_ALL 0Xff @@ -86,23 +83,26 @@ extern "C" { * ns_private.h */ typedef orte_std_cntr_t orte_jobid_t; -typedef orte_std_cntr_t orte_cellid_t; typedef orte_std_cntr_t orte_nodeid_t; typedef orte_std_cntr_t orte_vpid_t; typedef uint8_t orte_ns_cmp_bitmask_t; /**< Bit mask for comparing process names */ struct orte_process_name_t { - orte_cellid_t cellid; /**< Cell number */ orte_jobid_t jobid; /**< Job number */ orte_vpid_t vpid; /**< Process number */ }; typedef struct orte_process_name_t orte_process_name_t; + +/* useful define to print name args in output messages */ +ORTE_DECLSPEC extern char* orte_ns_base_print_name_args(orte_process_name_t *name); +#define ORTE_NAME_PRINT(n) \ + orte_ns_base_print_name_args(n) + /* * define maximum value for id's in any field */ -#define ORTE_CELLID_MAX ORTE_STD_CNTR_MAX #define ORTE_JOBID_MAX ORTE_STD_CNTR_MAX #define ORTE_VPID_MAX ORTE_STD_CNTR_MAX #define ORTE_NODEID_MAX ORTE_STD_CNTR_MAX @@ -110,7 +110,6 @@ typedef struct orte_process_name_t orte_process_name_t; /* * define minimum value for id's in any field */ -#define ORTE_CELLID_MIN ORTE_STD_CNTR_MIN #define ORTE_JOBID_MIN ORTE_STD_CNTR_MIN #define ORTE_VPID_MIN ORTE_STD_CNTR_MIN #define ORTE_NODEID_MIN ORTE_STD_CNTR_MIN @@ -118,7 +117,6 @@ typedef struct orte_process_name_t orte_process_name_t; /* * define invalid values */ -#define ORTE_CELLID_INVALID (ORTE_CELLID_MIN + 1) #define ORTE_JOBID_INVALID (ORTE_JOBID_MIN + 1) #define ORTE_VPID_INVALID (ORTE_VPID_MIN + 1) #define ORTE_NODEID_INVALID (ORTE_NODEID_MIN + 1) @@ -126,7 +124,6 @@ typedef struct orte_process_name_t orte_process_name_t; /* * define wildcard values (should be -1) */ -#define ORTE_CELLID_WILDCARD -1 #define ORTE_JOBID_WILDCARD -1 #define ORTE_VPID_WILDCARD -1 #define ORTE_NODEID_WILDCARD -1 @@ -152,7 +149,6 @@ ORTE_DECLSPEC extern orte_process_name_t orte_ns_name_my_hnp; /** instantiated * @param name */ #define ORTE_PROCESS_NAME_HTON(n) \ - n.cellid = htonl(n.cellid); \ n.jobid = htonl(n.jobid); \ n.vpid = htonl(n.vpid); @@ -162,7 +158,6 @@ ORTE_DECLSPEC extern orte_process_name_t orte_ns_name_my_hnp; /** instantiated * @param name */ #define ORTE_PROCESS_NAME_NTOH(n) \ - n.cellid = ntohl(n.cellid); \ n.jobid = ntohl(n.jobid); \ n.vpid = ntohl(n.vpid); diff --git a/orte/mca/ns/proxy/ns_proxy.h b/orte/mca/ns/proxy/ns_proxy.h index f3040a8c25..c651e53926 100644 --- a/orte/mca/ns/proxy/ns_proxy.h +++ b/orte/mca/ns/proxy/ns_proxy.h @@ -75,8 +75,6 @@ int orte_ns_proxy_finalize(void); typedef struct { size_t max_size, block_size; int debug; - orte_cellid_t num_cells; - orte_pointer_array_t *cells; orte_pointer_array_t *tags; orte_rml_tag_t num_tags; orte_pointer_array_t *dts; @@ -95,14 +93,9 @@ extern orte_ns_proxy_globals_t orte_ns_proxy; /* * proxy function prototypes */ -int orte_ns_proxy_create_cellid(orte_cellid_t *cellid, char *site, char *resource); +int orte_ns_proxy_create_nodeids(orte_nodeid_t **nodeids, orte_std_cntr_t *nnodes, char **nodenames); -int orte_ns_proxy_get_cell_info(orte_cellid_t cellid, char **site, char **resource); - -int orte_ns_proxy_create_nodeids(orte_nodeid_t **nodeids, orte_std_cntr_t *nnodes, - orte_cellid_t cellid, char **nodenames); - -int orte_ns_proxy_get_node_info(char ***nodename, orte_cellid_t cellid, orte_std_cntr_t num_nodes, orte_nodeid_t *nodeids); +int orte_ns_proxy_get_node_info(char ***nodename, orte_std_cntr_t num_nodes, orte_nodeid_t *nodeids); int orte_ns_proxy_create_jobid(orte_jobid_t *jobid, opal_list_t *attrs); @@ -134,8 +127,6 @@ int orte_ns_proxy_create_my_name(void); /* * Diagnostic functions */ -int orte_ns_proxy_dump_cells(void); - int orte_ns_proxy_dump_jobs(void); int orte_ns_proxy_dump_tags(void); diff --git a/orte/mca/ns/proxy/ns_proxy_cell_fns.c b/orte/mca/ns/proxy/ns_proxy_cell_fns.c index 4fbcd578cc..83474b8c29 100644 --- a/orte/mca/ns/proxy/ns_proxy_cell_fns.c +++ b/orte/mca/ns/proxy/ns_proxy_cell_fns.c @@ -45,186 +45,7 @@ * functions */ -int orte_ns_proxy_create_cellid(orte_cellid_t *cellid, char *site, char *resource) -{ - orte_buffer_t* cmd; - orte_buffer_t* answer; - orte_ns_cmd_flag_t command; - orte_std_cntr_t count; - int rc; - - OPAL_TRACE(1); - - command = ORTE_NS_CREATE_CELLID_CMD; - - cmd = OBJ_NEW(orte_buffer_t); - if (cmd == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, cellid, 1, ORTE_CELLID))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &site, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &resource, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(ORTE_NS_MY_REPLICA, cmd, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - answer = OBJ_NEW(orte_buffer_t); - if(answer == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - - if (ORTE_NS_CREATE_CELLID_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, cellid, &count, ORTE_CELLID))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - return rc; - } - OBJ_RELEASE(answer); - - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_SUCCESS; -} - - -int orte_ns_proxy_get_cell_info(orte_cellid_t cellid, - char **site, char **resource) -{ - orte_buffer_t* cmd; - orte_buffer_t* answer; - orte_ns_cmd_flag_t command; - orte_std_cntr_t count; - int rc; - - OPAL_TRACE(1); - - command = ORTE_NS_GET_CELL_INFO_CMD; - - cmd = OBJ_NEW(orte_buffer_t); - if (cmd == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &command, 1, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &cellid, 1, ORTE_CELLID))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - - if (0 > orte_rml.send_buffer(ORTE_NS_MY_REPLICA, cmd, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_RELEASE(cmd); - - answer = OBJ_NEW(orte_buffer_t); - if(answer == NULL) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, answer, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, &command, &count, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - - if (ORTE_NS_GET_CELL_INFO_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, site, &count, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(answer, resource, &count, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_SUCCESS; -} - -int orte_ns_proxy_create_nodeids(orte_nodeid_t **nodeids, orte_std_cntr_t *nnodes, - orte_cellid_t cellid, char **nodenames) +int orte_ns_proxy_create_nodeids(orte_nodeid_t **nodeids, orte_std_cntr_t *nnodes, char **nodenames) { orte_buffer_t* cmd; orte_buffer_t* answer; @@ -248,12 +69,6 @@ int orte_ns_proxy_create_nodeids(orte_nodeid_t **nodeids, orte_std_cntr_t *nnode return rc; } - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &cellid, 1, ORTE_CELLID))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - count = opal_argv_count(nodenames); if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &count, 1, ORTE_STD_CNTR))) { ORTE_ERROR_LOG(rc); @@ -323,8 +138,7 @@ int orte_ns_proxy_create_nodeids(orte_nodeid_t **nodeids, orte_std_cntr_t *nnode return ORTE_SUCCESS; } -int orte_ns_proxy_get_node_info(char ***nodenames, orte_cellid_t cellid, - orte_std_cntr_t num_nodes, orte_nodeid_t *nodeids) +int orte_ns_proxy_get_node_info(char ***nodenames, orte_std_cntr_t num_nodes, orte_nodeid_t *nodeids) { orte_buffer_t* cmd; orte_buffer_t* answer; @@ -350,13 +164,6 @@ int orte_ns_proxy_get_node_info(char ***nodenames, orte_cellid_t cellid, return rc; } - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &cellid, 1, ORTE_CELLID))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - if (ORTE_SUCCESS != (rc = orte_dss.pack(cmd, &num_nodes, 1, ORTE_STD_CNTR))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(cmd); diff --git a/orte/mca/ns/proxy/ns_proxy_component.c b/orte/mca/ns/proxy/ns_proxy_component.c index 2f6ac3e4ba..8d5e47c4d9 100644 --- a/orte/mca/ns/proxy/ns_proxy_component.c +++ b/orte/mca/ns/proxy/ns_proxy_component.c @@ -71,12 +71,6 @@ mca_ns_base_component_t mca_ns_proxy_component = { static mca_ns_base_module_t orte_ns_proxy_module = { /* init */ orte_ns_proxy_module_init, - /* cell functions */ - orte_ns_proxy_create_cellid, - orte_ns_proxy_get_cell_info, - orte_ns_base_get_cellid_string, - orte_ns_base_convert_cellid_to_string, - orte_ns_base_convert_string_to_cellid, /** node functions */ orte_ns_proxy_create_nodeids, orte_ns_proxy_get_node_info, @@ -111,7 +105,6 @@ static mca_ns_base_module_t orte_ns_proxy_module = { /* data type functions */ orte_ns_proxy_define_data_type, /* diagnostic functions */ - orte_ns_proxy_dump_cells, orte_ns_proxy_dump_jobs, orte_ns_proxy_dump_tags, orte_ns_proxy_dump_datatypes, @@ -232,17 +225,6 @@ mca_ns_base_module_t* orte_ns_proxy_init(int *priority) return NULL; } - /* initialize the cell info tracker */ - if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_ns_proxy.cells), - (orte_std_cntr_t)orte_ns_proxy.block_size, - (orte_std_cntr_t)orte_ns_proxy.max_size, - (orte_std_cntr_t)orte_ns_proxy.block_size))) { - ORTE_ERROR_LOG(rc); - return NULL; - } - orte_ns_proxy.num_cells = 0; - - /* initialize the taglist */ if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_ns_proxy.tags), diff --git a/orte/mca/ns/proxy/ns_proxy_diag_fns.c b/orte/mca/ns/proxy/ns_proxy_diag_fns.c index c75ed11043..c1a671ff6e 100644 --- a/orte/mca/ns/proxy/ns_proxy_diag_fns.c +++ b/orte/mca/ns/proxy/ns_proxy_diag_fns.c @@ -40,66 +40,6 @@ /* * DIAGNOSTIC functions */ -int orte_ns_proxy_dump_cells(void) -{ - orte_buffer_t cmd; - orte_buffer_t answer; - orte_ns_cmd_flag_t command; - orte_std_cntr_t count; - int rc; - - command = ORTE_NS_DUMP_CELLS_CMD; - - OPAL_THREAD_LOCK(&orte_ns_proxy.mutex); - - /* dump name service replica cell tracker */ - OBJ_CONSTRUCT(&cmd, orte_buffer_t); - if (ORTE_SUCCESS != (rc = orte_dss.pack(&cmd, &command, 1, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - OBJ_DESTRUCT(&cmd); - return rc; - } - - if (0 > orte_rml.send_buffer(ORTE_NS_MY_REPLICA, &cmd, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_DESTRUCT(&cmd); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_COMM_FAILURE; - } - OBJ_DESTRUCT(&cmd); - - OBJ_CONSTRUCT(&answer, orte_buffer_t); - if (0 > orte_rml.recv_buffer(ORTE_NS_MY_REPLICA, &answer, ORTE_RML_TAG_NS, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_DESTRUCT(&answer); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_COMM_FAILURE; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(&answer, &command, &count, ORTE_NS_CMD))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&answer); - return rc; - } - - if (ORTE_NS_DUMP_CELLS_CMD != command) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_DESTRUCT(&answer); - return ORTE_ERR_COMM_FAILURE; - } - - if (ORTE_SUCCESS != (rc = orte_ns_base_print_dump(&answer))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&answer); - return rc; - } - - return ORTE_SUCCESS; -} - - int orte_ns_proxy_dump_jobs(void) { orte_buffer_t cmd; @@ -220,8 +160,8 @@ int orte_ns_proxy_dump_tags(void) } /* dump local tag tracker */ - opal_output(mca_ns_base_output, "\n\n[%lu,%lu,%lu] Dump of Local Tag Tracker\n", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(mca_ns_base_output, "\n\n%s Dump of Local Tag Tracker\n", + ORTE_NAME_PRINT(orte_process_info.my_name)); ptr = (orte_ns_proxy_tagitem_t**)(orte_ns_proxy.tags)->addr; for (i=0, j=0; j < orte_ns_proxy.num_tags && i < (orte_ns_proxy.tags)->size; i++) { @@ -295,8 +235,8 @@ int orte_ns_proxy_dump_datatypes(void) } /* dump local datatype tracker */ - opal_output(mca_ns_base_output, "\n\n[%lu,%lu,%lu] Dump of Local Datatype Tracker\n", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(mca_ns_base_output, "\n\n%s Dump of Local Datatype Tracker\n", + ORTE_NAME_PRINT(orte_process_info.my_name)); ptr = (orte_ns_proxy_dti_t**)(orte_ns_proxy.dts)->addr; for (i=0, j=0; j < orte_ns_proxy.num_dts && i < (orte_ns_proxy.dts)->size; i++) { diff --git a/orte/mca/ns/proxy/ns_proxy_general_fns.c b/orte/mca/ns/proxy/ns_proxy_general_fns.c index 3d5dc9f8eb..1cd3a625dc 100644 --- a/orte/mca/ns/proxy/ns_proxy_general_fns.c +++ b/orte/mca/ns/proxy/ns_proxy_general_fns.c @@ -47,7 +47,6 @@ int orte_ns_proxy_get_peers(orte_process_name_t **procs, orte_buffer_t* answer; orte_ns_cmd_flag_t command; orte_std_cntr_t count, nprocs, i; - orte_cellid_t *cptr; orte_attribute_t *attr; int rc; @@ -59,35 +58,12 @@ int orte_ns_proxy_get_peers(orte_process_name_t **procs, *procs = NULL; *num_procs = 0; - /* check the attributes to see if USE_JOB or USE_CELL has been set. If not, then this is + /* check the attributes to see if USE_JOB has been set. If not, then this is * a request for my own job peers - process that one locally */ - /* if the cell is given AND it matches my own, then we can process this - * quickly. Otherwise, we have to do some more work. - * - * RHC: when we go multi-cell, we need a way to find all the cells upon - * which a job is executing so we can make this work! - */ - if (NULL != (attr = orte_rmgr.find_attribute(attrs, ORTE_NS_USE_CELL))) { - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&cptr, attr->value, ORTE_CELLID))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return rc; - } - if (*cptr != ORTE_PROC_MY_NAME->cellid && *cptr != ORTE_CELLID_WILDCARD) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_IMPLEMENTED); - OPAL_THREAD_UNLOCK(&orte_ns_proxy.mutex); - return ORTE_ERR_NOT_IMPLEMENTED; - } - } - if (NULL == (attr = orte_rmgr.find_attribute(attrs, ORTE_NS_USE_JOBID))) { - /* get my own job peers, assuming all are on this cell - process here - * - * RHC: This is a bad assumption. When we go multi-cell, we are going to have to process - * get peer requests solely on the HNP since we won't know the cellid otherwise - */ + /* get my own job peers */ *procs = (orte_process_name_t*)malloc(orte_process_info.num_procs * sizeof(orte_process_name_t)); if (NULL == *procs) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); @@ -96,7 +72,6 @@ int orte_ns_proxy_get_peers(orte_process_name_t **procs, } for (i=0; i < orte_process_info.num_procs; i++) { - (*procs)[i].cellid = ORTE_PROC_MY_NAME->cellid; (*procs)[i].jobid = ORTE_PROC_MY_NAME->jobid; (*procs)[i].vpid = orte_process_info.vpid_start + i; } diff --git a/orte/mca/ns/replica/ns_replica.c b/orte/mca/ns/replica/ns_replica.c index 160509496f..e69de29bb2 100644 --- a/orte/mca/ns/replica/ns_replica.c +++ b/orte/mca/ns/replica/ns_replica.c @@ -1,452 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file: - * - */ -#include "orte_config.h" - -#include -#include - -#include "opal/threads/mutex.h" -#include "opal/util/output.h" -#include "opal/util/trace.h" - -#include "orte/dss/dss.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/ns/base/base.h" -#include "orte/mca/ns/base/ns_private.h" -#include "ns_replica.h" - -/** - * globals - */ -#define NS_REPLICA_MAX_STRING_SIZE 256 - -/* - * DIAGNOSTIC functions - */ -int orte_ns_replica_dump_cells(void) -{ - orte_buffer_t buffer; - int rc; - - OBJ_CONSTRUCT(&buffer, orte_buffer_t); - if (ORTE_SUCCESS != (rc = orte_ns_replica_dump_cells_fn(&buffer))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_ns_base_print_dump(&buffer))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&buffer); - return rc; - } - - OBJ_DESTRUCT(&buffer); - return ORTE_SUCCESS; -} - -int orte_ns_replica_dump_cells_fn(orte_buffer_t *buffer) -{ - orte_std_cntr_t i; - orte_cellid_t j; - orte_ns_replica_cell_tracker_t **cell; - char tmp_out[NS_REPLICA_MAX_STRING_SIZE], *tmp; - int rc; - - OPAL_THREAD_LOCK(&orte_ns_replica.mutex); - - tmp = tmp_out; - snprintf(tmp, NS_REPLICA_MAX_STRING_SIZE, "Dump of Name Service Cell Tracker\n"); - if (ORTE_SUCCESS != (rc = orte_dss.pack(buffer, &tmp, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return rc; - } - cell = (orte_ns_replica_cell_tracker_t**)(orte_ns_replica.cells)->addr; - for (i=0, j=0; j < orte_ns_replica.num_cells && - i < (orte_ns_replica.cells)->size; i++) { - if (NULL != cell[i]) { - j++; - snprintf(tmp, NS_REPLICA_MAX_STRING_SIZE, "Num: %lu\tCell: %lu\n", - (unsigned long)j, (unsigned long)cell[i]->cell); - if (ORTE_SUCCESS != (rc = orte_dss.pack(buffer, &tmp, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return rc; - } - snprintf(tmp, NS_REPLICA_MAX_STRING_SIZE, "\tSite: %s\n\tResource: %s\n", - cell[i]->site, cell[i]->resource); - if (ORTE_SUCCESS != (rc = orte_dss.pack(buffer, &tmp, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return rc; - } - } - } - - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - - return ORTE_SUCCESS; -} - - -int orte_ns_replica_dump_jobs(void) -{ - orte_buffer_t buffer; - int rc; - - OBJ_CONSTRUCT(&buffer, orte_buffer_t); - - if (ORTE_SUCCESS != (rc = orte_ns_replica_dump_jobs_fn(&buffer))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_ns_base_print_dump(&buffer))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&buffer); - return rc; - } - - OBJ_DESTRUCT(&buffer); - return ORTE_SUCCESS; -} - -int orte_ns_replica_dump_jobs_fn(orte_buffer_t *buffer) -{ - orte_std_cntr_t i; - orte_cellid_t j; - orte_ns_replica_jobid_tracker_t **ptr; - char tmp_out[NS_REPLICA_MAX_STRING_SIZE], *tmp; - int rc; - - OPAL_THREAD_LOCK(&orte_ns_replica.mutex); - - tmp = tmp_out; - snprintf(tmp, NS_REPLICA_MAX_STRING_SIZE, "Dump of Name Service Jobid Tracker\n"); - if (ORTE_SUCCESS != (rc = orte_dss.pack(buffer, &tmp, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return rc; - } - ptr = (orte_ns_replica_jobid_tracker_t**)(orte_ns_replica.jobids)->addr; - for (i=0, j=0; j < orte_ns_replica.num_jobids && - i < (orte_ns_replica.jobids)->size; i++) { - if (NULL != ptr[i]) { - j++; - snprintf(tmp, NS_REPLICA_MAX_STRING_SIZE, "Num: %lu\tJobid: %lu\tNext vpid: %lu\n", - (unsigned long)j, (unsigned long)ptr[i]->jobid, - (unsigned long)ptr[i]->next_vpid); - if (ORTE_SUCCESS != (rc = orte_dss.pack(buffer, &tmp, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return rc; - } - } - } - - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - - return ORTE_SUCCESS; -} - - -int orte_ns_replica_dump_tags(void) -{ - orte_buffer_t buffer; - int rc; - - OBJ_CONSTRUCT(&buffer, orte_buffer_t); - if (ORTE_SUCCESS != (rc = orte_ns_replica_dump_tags_fn(&buffer))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_ns_base_print_dump(&buffer))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&buffer); - return rc; - } - - OBJ_DESTRUCT(&buffer); - return ORTE_SUCCESS; -} - - -int orte_ns_replica_dump_tags_fn(orte_buffer_t *buffer) -{ - orte_std_cntr_t i; - orte_rml_tag_t j; - orte_ns_replica_tagitem_t **ptr; - char tmp_out[NS_REPLICA_MAX_STRING_SIZE], *tmp; - int rc; - - OPAL_THREAD_LOCK(&orte_ns_replica.mutex); - - tmp = tmp_out; - snprintf(tmp, NS_REPLICA_MAX_STRING_SIZE, "Dump of Name Service RML Tag Tracker\n"); - if (ORTE_SUCCESS != (rc = orte_dss.pack(buffer, &tmp, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return rc; - } - ptr = (orte_ns_replica_tagitem_t**)(orte_ns_replica.tags)->addr; - for (i=0, j=0; j < orte_ns_replica.num_tags && - i < (orte_ns_replica.tags)->size; i++) { - if (NULL != ptr[i]) { - j++; - snprintf(tmp, NS_REPLICA_MAX_STRING_SIZE, "Num: %lu\tTag id: %lu\tName: %s\n", - (unsigned long)j, (unsigned long)ptr[i]->tag, ptr[i]->name); - if (ORTE_SUCCESS != (rc = orte_dss.pack(buffer, &tmp, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return rc; - } - } - } - - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - - return ORTE_SUCCESS; -} - - -int orte_ns_replica_dump_datatypes(void) -{ - orte_buffer_t buffer; - int rc; - - OBJ_CONSTRUCT(&buffer, orte_buffer_t); - if (ORTE_SUCCESS != (rc = orte_ns_replica_dump_datatypes_fn(&buffer))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_ns_base_print_dump(&buffer))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&buffer); - return rc; - } - - OBJ_DESTRUCT(&buffer); - return ORTE_SUCCESS; -} - -int orte_ns_replica_dump_datatypes_fn(orte_buffer_t *buffer) -{ - orte_std_cntr_t i, j; - orte_ns_replica_dti_t **ptr; - char tmp_out[NS_REPLICA_MAX_STRING_SIZE], *tmp; - int rc; - - OPAL_THREAD_LOCK(&orte_ns_replica.mutex); - - tmp = tmp_out; - snprintf(tmp, NS_REPLICA_MAX_STRING_SIZE, "Dump of Name Service Datatype Tracker\n"); - if (ORTE_SUCCESS != (rc = orte_dss.pack(buffer, &tmp, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return rc; - } - ptr = (orte_ns_replica_dti_t**)(orte_ns_replica.dts)->addr; - for (i=0, j=0; j < orte_ns_replica.num_dts && - i < (orte_ns_replica.dts)->size; i++) { - if (NULL != ptr[i]) { - j++; - snprintf(tmp, NS_REPLICA_MAX_STRING_SIZE, "Num: %lu\tDatatype id: %lu\tName: %s\n", - (unsigned long)j, (unsigned long)ptr[i]->id, ptr[i]->name); - if (ORTE_SUCCESS != (rc = orte_dss.pack(buffer, &tmp, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return rc; - } - } - } - - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - - return ORTE_SUCCESS; -} - - -/* - * TAG SERVER functions - */ -int orte_ns_replica_assign_rml_tag(orte_rml_tag_t *tag, - char *name) -{ - orte_ns_replica_tagitem_t *tagitem, **tags; - orte_std_cntr_t i; - orte_rml_tag_t j; - int rc; - - OPAL_THREAD_LOCK(&orte_ns_replica.mutex); - - if (NULL != name) { - /* see if this name is already in list - if so, return tag */ - tags = (orte_ns_replica_tagitem_t**)orte_ns_replica.tags->addr; - for (i=0, j=0; j < orte_ns_replica.num_tags && - i < (orte_ns_replica.tags)->size; i++) { - if (NULL != tags[i]) { - j++; - if (tags[i]->name != NULL && - 0 == strcmp(name, tags[i]->name)) { /* found name on list */ - *tag = tags[i]->tag; - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_SUCCESS; - } - } - } - } - - /* not in list or not provided, so allocate next tag */ - *tag = ORTE_RML_TAG_MAX; - - /* check if tag is available - need to do this since the tag type - * is probably not going to be a orte_std_cntr_t, so we cannot just rely - * on the pointer_array's size limits to protect us. NOTE: need to - * reserve ORTE_RML_TAG_MAX as an invalid value, so can't let - * num_tags get there - */ - if (ORTE_RML_TAG_MAX-2 < orte_ns_replica.num_tags) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - tagitem = OBJ_NEW(orte_ns_replica_tagitem_t); - if (NULL == tagitem) { /* out of memory */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (ORTE_SUCCESS != (rc = orte_pointer_array_add(&i, - orte_ns_replica.tags, tagitem))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return rc; - } - tagitem->tag = orte_ns_replica.num_tags + ORTE_RML_TAG_DYNAMIC; - (orte_ns_replica.num_tags)++; - if (NULL != name) { /* provided - can look it up later */ - tagitem->name = strdup(name); - } else { - tagitem->name = NULL; - } - - *tag = tagitem->tag; - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_SUCCESS; -} - - -/* - * DATA TYPE SERVER functions - */ -int orte_ns_replica_define_data_type(const char *name, - orte_data_type_t *type) -{ - orte_ns_replica_dti_t **dti, *dtip; - orte_std_cntr_t i, j; - int rc; - - if (NULL == name || 0 < *type) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - - OPAL_THREAD_LOCK(&orte_ns_replica.mutex); - - dti = (orte_ns_replica_dti_t**)orte_ns_replica.dts->addr; - for (i=0, j=0; j < orte_ns_replica.num_dts && - i < orte_ns_replica.dts->size; i++) { - if (NULL != dti[i]) { - j++; - if (dti[i]->name != NULL && - 0 == strcmp(name, dti[i]->name)) { /* found name on list */ - *type = dti[i]->id; - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_SUCCESS; - } - } - } - - /* not in list or not provided, so allocate next id */ - *type = ORTE_DSS_ID_MAX; - - /* check if id is available - need to do this since the data type - * is probably not going to be a orte_std_cntr_t, so we cannot just rely - * on the pointer_array's size limits to protect us. - */ - if (ORTE_DSS_ID_MAX-2 < orte_ns_replica.num_dts) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - dtip = OBJ_NEW(orte_ns_replica_dti_t); - if (NULL == dtip) { /* out of memory */ - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - dtip->name = strdup(name); - if (ORTE_SUCCESS != (rc = orte_pointer_array_add(&i, - orte_ns_replica.dts, dtip))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return rc; - } - dtip->id = orte_ns_replica.num_dts; - (orte_ns_replica.num_dts)++; - - *type = dtip->id; - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_SUCCESS; -} - - - -/* - * NAME functions - */ -int orte_ns_replica_create_my_name(void) -{ - orte_jobid_t jobid; - orte_vpid_t vpid; - int rc; - - if (ORTE_SUCCESS != (rc = orte_ns.create_jobid(&jobid))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (ORTE_SUCCESS != (rc = orte_ns.reserve_range(jobid, 1, &vpid))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(&(orte_process_info.my_name), - 0, jobid, vpid))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} diff --git a/orte/mca/ns/replica/ns_replica.h b/orte/mca/ns/replica/ns_replica.h index 5291813293..984a5ca011 100644 --- a/orte/mca/ns/replica/ns_replica.h +++ b/orte/mca/ns/replica/ns_replica.h @@ -39,31 +39,6 @@ extern "C" { */ #define NS_REPLICA_MAX_STRING_SIZE 256 - -/* class for tracking cellid's */ -struct orte_ns_replica_cell_tracker_t { - opal_object_t super; - orte_cellid_t cell; - char *site; - char *resource; - orte_nodeid_t next_nodeid; - orte_pointer_array_t *nodeids; -}; -typedef struct orte_ns_replica_cell_tracker_t orte_ns_replica_cell_tracker_t; - -OBJ_CLASS_DECLARATION(orte_ns_replica_cell_tracker_t); - -/* object for tracking nodeid's */ -struct orte_ns_replica_nodeid_tracker_t { - opal_object_t super; - char *nodename; - orte_nodeid_t nodeid; -}; -typedef struct orte_ns_replica_nodeid_tracker_t orte_ns_replica_nodeid_tracker_t; - -OBJ_CLASS_DECLARATION(orte_ns_replica_nodeid_tracker_t); - - /* * object for tracking vpids and jobids for job families * This structure is used to track the parent-child relationship between @@ -106,8 +81,8 @@ OBJ_CLASS_DECLARATION(orte_ns_replica_dti_t); */ typedef struct { size_t max_size, block_size; - orte_cellid_t num_cells; - orte_pointer_array_t *cells; + orte_nodeid_t next_nodeid; + orte_pointer_array_t *nodenames; orte_jobid_t num_jobids; opal_list_t jobs; orte_pointer_array_t *tags; @@ -143,17 +118,11 @@ void orte_ns_replica_recv(int status, orte_process_name_t* sender, orte_buffer_t* buffer, orte_rml_tag_t tag, void* cbdata); /* - * CELL FUNCTIONS + * NODE FUNCTIONS */ -int orte_ns_replica_create_cellid(orte_cellid_t *cellid, char *site, char *resource); +int orte_ns_replica_create_nodeids(orte_nodeid_t **nodeids, orte_std_cntr_t *nnodes, char **nodenames); -int orte_ns_replica_get_cell_info(orte_cellid_t cellid, - char **site, char **resource); - -int orte_ns_replica_create_nodeids(orte_nodeid_t **nodeids, orte_std_cntr_t *nnodes, - orte_cellid_t cellid, char **nodenames); - -int orte_ns_replica_get_node_info(char ***nodenames, orte_cellid_t cellid, orte_std_cntr_t num_nodes, orte_nodeid_t *nodeids); +int orte_ns_replica_get_node_info(char ***nodenames, orte_std_cntr_t num_nodes, orte_nodeid_t *nodeids); /* * JOB FUNCTIONS @@ -195,9 +164,6 @@ int orte_ns_replica_create_my_name(void); /* * DIAGNOSTIC FUNCTIONS */ -int orte_ns_replica_dump_cells(void); -int orte_ns_replica_dump_cells_fn(orte_buffer_t *buffer); - int orte_ns_replica_dump_jobs(void); int orte_ns_replica_dump_jobs_fn(orte_buffer_t *buffer); diff --git a/orte/mca/ns/replica/ns_replica_cell_fns.c b/orte/mca/ns/replica/ns_replica_cell_fns.c index 4bfdb346ef..dd6324cf04 100644 --- a/orte/mca/ns/replica/ns_replica_cell_fns.c +++ b/orte/mca/ns/replica/ns_replica_cell_fns.c @@ -38,141 +38,14 @@ * functions */ -int orte_ns_replica_create_cellid(orte_cellid_t *cellid, char *site, char *resource) -{ - orte_ns_replica_cell_tracker_t *new_cell, **cell; - int rc; - orte_std_cntr_t i, j, index; - - OPAL_TRACE(1); - - OPAL_THREAD_LOCK(&orte_ns_replica.mutex); - - /* if a valid cellid is given to us, then all we need to do is - * update the descriptive info - */ - if (ORTE_CELLID_INVALID != *cellid) { - /* see if the cell info is already present */ - cell = (orte_ns_replica_cell_tracker_t**)(orte_ns_replica.cells)->addr; - for (i=0, j=0; j < orte_ns_replica.num_cells && - i < (orte_ns_replica.cells)->size; i++) { - if (NULL != cell[i]) { - j++; - if (cell[i]->cell == *cellid) { - /* it is here - update the info */ - if (NULL != cell[i]->site) { - free(cell[i]->site); - } - if (NULL != cell[i]->resource) { - free(cell[i]->resource); - } - new_cell = cell[i]; - goto UPDATE; - } - } - } - /* get here if one isn't already present - create one */ - goto NEWSITE; - } - - /* check for error */ - if (NULL == site || NULL == resource) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_BAD_PARAM; - } - - /* is this a known cellid? */ - cell = (orte_ns_replica_cell_tracker_t**)(orte_ns_replica.cells)->addr; - for (i=0, j=0; j < orte_ns_replica.num_cells && - i < (orte_ns_replica.cells)->size; i++) { - if (NULL != cell[i]) { - j++; - if (0 == strcmp(site, cell[i]->site) && - 0 == strcmp(resource, cell[i]->resource)) { - *cellid = cell[i]->cell; - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_SUCCESS; - } - } - } - - *cellid = orte_ns_replica.num_cells; - -NEWSITE: - /* new cell - check if cellid is available */ - if (ORTE_CELLID_MAX-1 < orte_ns_replica.num_cells) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - new_cell = OBJ_NEW(orte_ns_replica_cell_tracker_t); - if (NULL == new_cell) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (ORTE_SUCCESS != (rc = orte_pointer_array_add(&index, orte_ns_replica.cells, new_cell))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return rc; - } - (orte_ns_replica.num_cells)++; - - new_cell->cell = *cellid; - -UPDATE: - new_cell->site = strdup(site); - new_cell->resource = strdup(resource); - - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_SUCCESS; -} - -int orte_ns_replica_get_cell_info(orte_cellid_t cellid, - char **site, char **resource) -{ - orte_std_cntr_t i; - orte_cellid_t j; - orte_ns_replica_cell_tracker_t **cell; - - OPAL_TRACE(1); - - OPAL_THREAD_LOCK(&orte_ns_replica.mutex); - - cell = (orte_ns_replica_cell_tracker_t**)(orte_ns_replica.cells)->addr; - for (i=0, j=0; j < orte_ns_replica.num_cells && - i < (orte_ns_replica.cells)->size; i++) { - if (NULL != cell[i]) { - j++; - if (cellid == cell[i]->cell) { - *site = strdup(cell[i]->site); - *resource = strdup(cell[i]->resource); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_SUCCESS; - } - } - } - - /* it isn't an error to not find the cell - so do NOT - * report it via ORTE_ERROR_LOG - */ - - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_NOT_FOUND; -} - /* * NODEID */ -int orte_ns_replica_create_nodeids(orte_nodeid_t **nodeids, orte_std_cntr_t *nnodes, - orte_cellid_t cellid, char **nodenames) +int orte_ns_replica_create_nodeids(orte_nodeid_t **nodeids, orte_std_cntr_t *nnodes, char **nodenames) { - orte_ns_replica_cell_tracker_t **cell, *cptr; - orte_ns_replica_nodeid_tracker_t **nodes, *node; - orte_nodeid_t *nds, nid; - orte_std_cntr_t i, j, k, m, n, num_nodes; + orte_nodeid_t *nds, nid, m; + orte_std_cntr_t k, n, num_nodes; + char **nodes; OPAL_THREAD_LOCK(&orte_ns_replica.mutex); @@ -189,54 +62,20 @@ int orte_ns_replica_create_nodeids(orte_nodeid_t **nodeids, orte_std_cntr_t *nno return ORTE_ERR_OUT_OF_RESOURCE; } - /** find the cell */ - cell = (orte_ns_replica_cell_tracker_t**)(orte_ns_replica.cells)->addr; - for (i=0, j=0; j < orte_ns_replica.num_cells && - i < (orte_ns_replica.cells)->size; i++) { - if (NULL != cell[i]) { - j++; - if (cellid == cell[i]->cell) { - /** found the specified cell - check to see if nodename has already been - * defined. if so, just return the nodeid. if not, create a new one - */ - cptr = cell[i]; - goto PROCESS; - } - } - } - /** get here if we didn't find the cell */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - free(nds); - *nodeids = NULL; - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_NOT_FOUND; - -PROCESS: - nodes = (orte_ns_replica_nodeid_tracker_t**)(cptr->nodeids->addr); + nodes = (char**)(orte_ns_replica.nodenames->addr); for (n=0; n < num_nodes; n++) { - for (k=0, m=0; m < cptr->next_nodeid && - k < (cptr->nodeids)->size; k++) { + for (k=0, m=0; m < orte_ns_replica.next_nodeid && + k < (orte_ns_replica.nodenames)->size; k++) { if (NULL != nodes[k]) { m++; - if (strcmp(nodenames[n], nodes[k]->nodename) == 0) { /** found same name */ - nid = nodes[k]->nodeid; + if (strcmp(nodenames[n], nodes[k]) == 0) { /** found same name */ + nid = m; goto ASSIGN; } } } - /** get here if we don't find this nodename - add one */ - node = OBJ_NEW(orte_ns_replica_nodeid_tracker_t); - if (NULL == node) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - free(nds); - *nodeids = NULL; - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_OUT_OF_RESOURCE; - } - node->nodename = strdup(nodenames[n]); - node->nodeid = cptr->next_nodeid; - cptr->next_nodeid++; - nid = node->nodeid; + /** get here if we don't find this nodename - add it */ + nid = orte_ns_replica.next_nodeid++; ASSIGN: nds[n] = nid; @@ -249,16 +88,13 @@ ASSIGN: return ORTE_SUCCESS; } -int orte_ns_replica_get_node_info(char ***nodenames, orte_cellid_t cellid, - orte_std_cntr_t num_nodes, orte_nodeid_t *nodeids) +int orte_ns_replica_get_node_info(char ***nodenames, orte_std_cntr_t num_nodes, orte_nodeid_t *nodeids) { - char **names, *nm; - orte_ns_replica_cell_tracker_t **cell, *cptr; - orte_ns_replica_nodeid_tracker_t **nodes; - orte_std_cntr_t i, j, k, m, n; - char *err_name = "NODE_NOT_FOUND"; + char **names; + orte_std_cntr_t n; + char **nodes; - OPAL_THREAD_LOCK(&orte_ns_replica.mutex); + OPAL_THREAD_LOCK(&orte_ns_replica.mutex); if (0 == num_nodes) { *nodenames = NULL; @@ -273,48 +109,15 @@ int orte_ns_replica_get_node_info(char ***nodenames, orte_cellid_t cellid, } names[num_nodes] = NULL; /** NULL-terminate the list */ - /** find the cell */ - cell = (orte_ns_replica_cell_tracker_t**)(orte_ns_replica.cells)->addr; - for (i=0, j=0; j < orte_ns_replica.num_cells && - i < (orte_ns_replica.cells)->size; i++) { - if (NULL != cell[i]) { - j++; - if (cellid == cell[i]->cell) { - /** found the specified cell - check to see if nodename has already been - * defined. if so, just return the nodeid. if not, create a new one - */ - cptr = cell[i]; - goto PROCESS; - } - } - } - /** get here if we didn't find the cell */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - free(names); - *nodenames = NULL; - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_NOT_FOUND; - -PROCESS: - nodes = (orte_ns_replica_nodeid_tracker_t**)(cell[i]->nodeids->addr); + nodes = (char**)(orte_ns_replica.nodenames->addr); for (n=0; n < num_nodes; n++) { - for (k=0, m=0; m < cell[i]->next_nodeid && - k < (cell[i]->nodeids)->size; k++) { - if (NULL != nodes[k]) { - m++; - if (nodeids[n] == nodes[k]->nodeid) { /** found it */ - nm = nodes[k]->nodename; - goto ASSIGN; - } - } + if (nodeids[n] >= orte_ns_replica.next_nodeid) { + names[n] = strdup("invalid nodeid"); + } else if (NULL != nodes[nodeids[n]]) { + names[n] = strdup(nodes[nodeids[n]]); + } else { + names[n] = strdup("unknown nodeid"); } - /** node not found - set name to error name. Can't set it to NULL since - * the list is a NULL-terminated one - */ - nm = err_name; - -ASSIGN: - names[n] = strdup(nm); } *nodenames = names; diff --git a/orte/mca/ns/replica/ns_replica_class_instances.h b/orte/mca/ns/replica/ns_replica_class_instances.h index b2a5c49a5c..467925c4f5 100644 --- a/orte/mca/ns/replica/ns_replica_class_instances.h +++ b/orte/mca/ns/replica/ns_replica_class_instances.h @@ -34,67 +34,6 @@ extern "C" { #endif -/*** CELLID ***/ -/* constructor - used to initialize state of cell_tracker instance */ -static void orte_ns_replica_cell_tracker_construct(orte_ns_replica_cell_tracker_t* cell_tracker) -{ - cell_tracker->cell = ORTE_CELLID_INVALID; - cell_tracker->site = NULL; - cell_tracker->resource = NULL; - - cell_tracker->next_nodeid = 0; - orte_pointer_array_init(&(cell_tracker->nodeids), - orte_ns_replica.block_size, - orte_ns_replica.max_size, - orte_ns_replica.block_size); -} - -/* destructor - used to free any resources held by instance */ -static void orte_ns_replica_cell_tracker_destructor(orte_ns_replica_cell_tracker_t* cell_tracker) -{ - orte_std_cntr_t i, j; - orte_ns_replica_nodeid_tracker_t **nodeid; - - if (NULL != cell_tracker->site) free(cell_tracker->site); - if (NULL != cell_tracker->resource) free(cell_tracker->resource); - - nodeid = (orte_ns_replica_nodeid_tracker_t**)(cell_tracker->nodeids)->addr; - - for (i=0, j=0; j < cell_tracker->next_nodeid && - i < (cell_tracker->nodeids)->size; i++) { - if (NULL != nodeid[i]) { - j++; - OBJ_RELEASE(nodeid[i]); - } - } - OBJ_RELEASE(cell_tracker->nodeids); -} - -/* define instance of opal_class_t */ -OBJ_CLASS_INSTANCE(orte_ns_replica_cell_tracker_t, /* type name */ - opal_object_t, /* parent "class" name */ - orte_ns_replica_cell_tracker_construct, /* constructor */ - orte_ns_replica_cell_tracker_destructor); /* destructor */ - - -/** NODEID */ -static void orte_ns_replica_nodeid_tracker_construct(orte_ns_replica_nodeid_tracker_t *ptr) -{ - ptr->nodeid = ORTE_NODEID_INVALID; - ptr->nodename = NULL; -} - -static void orte_ns_replica_nodeid_tracker_destructor(orte_ns_replica_nodeid_tracker_t *ptr) -{ - if (NULL != ptr->nodename) free(ptr->nodename); -} - -OBJ_CLASS_INSTANCE(orte_ns_replica_nodeid_tracker_t, /* type name */ - opal_object_t, /* parent "class" name */ - orte_ns_replica_nodeid_tracker_construct, /* constructor */ - orte_ns_replica_nodeid_tracker_destructor); /* destructor */ - - /*** JOBITEM ***/ /* constructor - used to initialize state of jobitem instance */ static void orte_ns_replica_jobitem_construct(orte_ns_replica_jobitem_t *ptr) diff --git a/orte/mca/ns/replica/ns_replica_component.c b/orte/mca/ns/replica/ns_replica_component.c index 5cb29f10ed..fd02073ea8 100644 --- a/orte/mca/ns/replica/ns_replica_component.c +++ b/orte/mca/ns/replica/ns_replica_component.c @@ -77,12 +77,6 @@ orte_ns_replica_finalize /* module shutdown */ static mca_ns_base_module_t orte_ns_replica_module = { /* init */ orte_ns_replica_module_init, - /* cell functions */ - orte_ns_replica_create_cellid, - orte_ns_replica_get_cell_info, - orte_ns_base_get_cellid_string, - orte_ns_base_convert_cellid_to_string, - orte_ns_base_convert_string_to_cellid, /** node functions */ orte_ns_replica_create_nodeids, orte_ns_replica_get_node_info, @@ -117,7 +111,6 @@ static mca_ns_base_module_t orte_ns_replica_module = { /* data type functions */ orte_ns_replica_define_data_type, /* diagnostic functions */ - orte_ns_replica_dump_cells, orte_ns_replica_dump_jobs, orte_ns_replica_dump_tags, orte_ns_replica_dump_datatypes, @@ -195,15 +188,15 @@ mca_ns_base_module_t* orte_ns_replica_init(int *priority) *priority = 50; - /* initialize the cell info tracker */ - if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_ns_replica.cells), + /* initialize the node tracker */ + if (ORTE_SUCCESS != (rc = orte_pointer_array_init(&(orte_ns_replica.nodenames), (orte_std_cntr_t)orte_ns_replica.block_size, (orte_std_cntr_t)orte_ns_replica.max_size, (orte_std_cntr_t)orte_ns_replica.block_size))) { ORTE_ERROR_LOG(rc); return NULL; } - orte_ns_replica.num_cells = 0; + orte_ns_replica.next_nodeid = 0; /* initialize the job tracking system */ OBJ_CONSTRUCT(&orte_ns_replica.jobs, opal_list_t); @@ -265,22 +258,25 @@ int orte_ns_replica_module_init(void) */ int orte_ns_replica_finalize(void) { - orte_ns_replica_cell_tracker_t **cptr; + char **cptr; opal_list_item_t *item; orte_ns_replica_tagitem_t **tag; orte_ns_replica_dti_t **dti; orte_std_cntr_t i; + orte_nodeid_t j; /* free all tracking storage, but only if this component was initialized */ if (initialized) { - cptr = (orte_ns_replica_cell_tracker_t**)(orte_ns_replica.cells)->addr; - for (i=0; i < (orte_ns_replica.cells)->size; i++) { + cptr = (char**)(orte_ns_replica.nodenames)->addr; + for (i=0, j=0; j < orte_ns_replica.next_nodeid && + i < (orte_ns_replica.nodenames)->size; i++) { if (NULL != cptr[i]) { - OBJ_RELEASE(cptr[i]); + j++; + free(cptr[i]); } } - OBJ_RELEASE(orte_ns_replica.cells); + OBJ_RELEASE(orte_ns_replica.nodenames); while (NULL != (item = opal_list_remove_first(&orte_ns_replica.jobs))) { OBJ_RELEASE(item); diff --git a/orte/mca/ns/replica/ns_replica_diag_fns.c b/orte/mca/ns/replica/ns_replica_diag_fns.c index 94bd9ff103..b6f9bdd9af 100644 --- a/orte/mca/ns/replica/ns_replica_diag_fns.c +++ b/orte/mca/ns/replica/ns_replica_diag_fns.c @@ -37,72 +37,6 @@ /* * DIAGNOSTIC functions */ -int orte_ns_replica_dump_cells(void) -{ - orte_buffer_t buffer; - int rc; - - OBJ_CONSTRUCT(&buffer, orte_buffer_t); - if (ORTE_SUCCESS != (rc = orte_ns_replica_dump_cells_fn(&buffer))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_ns_base_print_dump(&buffer))) { - ORTE_ERROR_LOG(rc); - OBJ_DESTRUCT(&buffer); - return rc; - } - - OBJ_DESTRUCT(&buffer); - return ORTE_SUCCESS; -} - -int orte_ns_replica_dump_cells_fn(orte_buffer_t *buffer) -{ - orte_std_cntr_t i; - orte_cellid_t j; - orte_ns_replica_cell_tracker_t **cell; - char tmp_out[NS_REPLICA_MAX_STRING_SIZE], *tmp; - int rc; - - OPAL_THREAD_LOCK(&orte_ns_replica.mutex); - - tmp = tmp_out; - snprintf(tmp, NS_REPLICA_MAX_STRING_SIZE, "Dump of Name Service Cell Tracker\n"); - if (ORTE_SUCCESS != (rc = orte_dss.pack(buffer, &tmp, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return rc; - } - cell = (orte_ns_replica_cell_tracker_t**)(orte_ns_replica.cells)->addr; - for (i=0, j=0; j < orte_ns_replica.num_cells && - i < (orte_ns_replica.cells)->size; i++) { - if (NULL != cell[i]) { - j++; - snprintf(tmp, NS_REPLICA_MAX_STRING_SIZE, "Num: %lu\tCell: %lu\n", - (unsigned long)j, (unsigned long)cell[i]->cell); - if (ORTE_SUCCESS != (rc = orte_dss.pack(buffer, &tmp, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return rc; - } - snprintf(tmp, NS_REPLICA_MAX_STRING_SIZE, "\tSite: %s\n\tResource: %s\n", - cell[i]->site, cell[i]->resource); - if (ORTE_SUCCESS != (rc = orte_dss.pack(buffer, &tmp, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return rc; - } - } - } - - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - - return ORTE_SUCCESS; -} - - int orte_ns_replica_dump_jobs(void) { orte_buffer_t buffer; diff --git a/orte/mca/ns/replica/ns_replica_general_fns.c b/orte/mca/ns/replica/ns_replica_general_fns.c index adbe4cb8b6..a155499293 100644 --- a/orte/mca/ns/replica/ns_replica_general_fns.c +++ b/orte/mca/ns/replica/ns_replica_general_fns.c @@ -41,7 +41,6 @@ int orte_ns_replica_get_peers(orte_process_name_t **procs, { orte_std_cntr_t i, isave, npeers; orte_jobid_t *jptr; - orte_cellid_t *cptr; orte_attribute_t *attr; orte_ns_replica_jobitem_t *job_info, *child; opal_list_item_t *item; @@ -56,31 +55,12 @@ int orte_ns_replica_get_peers(orte_process_name_t **procs, *procs = NULL; *num_procs = 0; - /* check the attributes to see if USE_JOB or USE_CELL has been set. If not, then this is + /* check the attributes to see if USE_JOB has been set. If not, then this is * a request for my own job peers - process that one locally */ - /* if the cell is given AND it matches my own, then we can process this - * quickly. Otherwise, we have to do some more work. - * - * RHC: when we go multi-cell, we need a way to find all the cells upon - * which a job is executing so we can make this work! - */ - if (NULL != (attr = orte_rmgr.find_attribute(attrs, ORTE_NS_USE_CELL))) { - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&cptr, attr->value, ORTE_CELLID))) { - ORTE_ERROR_LOG(rc); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return rc; - } - if (*cptr != ORTE_PROC_MY_NAME->cellid && *cptr != ORTE_CELLID_WILDCARD) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_IMPLEMENTED); - OPAL_THREAD_UNLOCK(&orte_ns_replica.mutex); - return ORTE_ERR_NOT_IMPLEMENTED; - } - } - if (NULL == (attr = orte_rmgr.find_attribute(attrs, ORTE_NS_USE_JOBID))) { - /* get my own job peers, assuming all are on this cell */ + /* get my own job peers */ *procs = (orte_process_name_t*)malloc(orte_process_info.num_procs * sizeof(orte_process_name_t)); if (NULL == *procs) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); @@ -89,7 +69,6 @@ int orte_ns_replica_get_peers(orte_process_name_t **procs, } for (i=0; i < orte_process_info.num_procs; i++) { - (*procs)[i].cellid = ORTE_PROC_MY_NAME->cellid; (*procs)[i].jobid = ORTE_PROC_MY_NAME->jobid; (*procs)[i].vpid = orte_process_info.vpid_start + i; } @@ -151,7 +130,6 @@ int orte_ns_replica_get_peers(orte_process_name_t **procs, while (NULL != (item = opal_list_remove_first(&peerlist))) { child = (orte_ns_replica_jobitem_t*)item; for (i=0; i < child->next_vpid; i++) { - (*procs)[i+isave].cellid = ORTE_PROC_MY_NAME->cellid; (*procs)[i+isave].jobid = child->jobid; (*procs)[i+isave].vpid = i; } @@ -186,7 +164,6 @@ int orte_ns_replica_get_peers(orte_process_name_t **procs, /* populate it, starting with the specified job followed by its children */ for (i=0; i < job_info->next_vpid; i++) { - (*procs)[i].cellid = ORTE_PROC_MY_NAME->cellid; (*procs)[i].jobid = *jptr; (*procs)[i].vpid = i; } @@ -196,7 +173,6 @@ int orte_ns_replica_get_peers(orte_process_name_t **procs, item = opal_list_get_next(item)) { child = (orte_ns_replica_jobitem_t*)item; for (i=0; i < child->next_vpid; i++) { - (*procs)[i+isave].cellid = ORTE_PROC_MY_NAME->cellid; (*procs)[i+isave].jobid = child->jobid; (*procs)[i+isave].vpid = i; } @@ -220,7 +196,6 @@ int orte_ns_replica_get_peers(orte_process_name_t **procs, } for (i=0; i < job_info->next_vpid; i++) { - (*procs)[i].cellid = ORTE_PROC_MY_NAME->cellid; (*procs)[i].jobid = *jptr; (*procs)[i].vpid = i; } @@ -392,8 +367,7 @@ int orte_ns_replica_create_my_name(void) return rc; } - if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(&(orte_process_info.my_name), - 0, jobid, vpid))) { + if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(&(orte_process_info.my_name), jobid, vpid))) { ORTE_ERROR_LOG(rc); return rc; } diff --git a/orte/mca/ns/replica/ns_replica_recv.c b/orte/mca/ns/replica/ns_replica_recv.c index 06c08fe471..6ce799141f 100644 --- a/orte/mca/ns/replica/ns_replica_recv.c +++ b/orte/mca/ns/replica/ns_replica_recv.c @@ -58,15 +58,14 @@ void orte_ns_replica_recv(int status, orte_process_name_t* sender, orte_buffer_t answer, error_answer; orte_ns_cmd_flag_t command; opal_list_t attrs; - orte_cellid_t cell; orte_jobid_t job, root, *descendants; orte_vpid_t startvpid, range; - char *tagname, *site, *resource; + char *tagname; orte_rml_tag_t oob_tag; orte_data_type_t type; orte_std_cntr_t count, nprocs, nret; orte_process_name_t *procs; - int rc=ORTE_SUCCESS, ret; + int rc=ORTE_SUCCESS; count = 1; if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &command, &count, ORTE_NS_CMD))) { @@ -82,69 +81,6 @@ void orte_ns_replica_recv(int status, orte_process_name_t* sender, } switch (command) { - case ORTE_NS_CREATE_CELLID_CMD: - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &cell, &count, ORTE_CELLID))) { - ORTE_ERROR_LOG(rc); - rc = ORTE_ERR_BAD_PARAM; - goto RETURN_ERROR; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &site, &count, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - rc = ORTE_ERR_BAD_PARAM; - goto RETURN_ERROR; - } - - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &resource, &count, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - rc = ORTE_ERR_BAD_PARAM; - goto RETURN_ERROR; - } - - rc = orte_ns_replica_create_cellid(&cell, site, resource); - - if (ORTE_SUCCESS != (ret = orte_dss.pack(&answer, &cell, 1, ORTE_CELLID))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - if (0 > orte_rml.send_buffer(sender, &answer, tag, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - goto RETURN_ERROR; - } - break; - - case ORTE_NS_GET_CELL_INFO_CMD: - count = 1; - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &cell, &count, ORTE_CELLID))) { - ORTE_ERROR_LOG(rc); - rc = ORTE_ERR_BAD_PARAM; - goto RETURN_ERROR; - } - - site = NULL; - resource = NULL; - rc = orte_ns_replica_get_cell_info(cell, &site, &resource); - - if (ORTE_SUCCESS != (ret = orte_dss.pack(&answer, &site, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - if (ORTE_SUCCESS != (ret = orte_dss.pack(&answer, &resource, 1, ORTE_STRING))) { - ORTE_ERROR_LOG(ret); - goto RETURN_ERROR; - } - - if (0 > orte_rml.send_buffer(sender, &answer, tag, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - goto RETURN_ERROR; - } - break; - case ORTE_NS_CREATE_NODEID_CMD: case ORTE_NS_GET_NODE_INFO_CMD: ORTE_ERROR_LOG(ORTE_ERR_NOT_IMPLEMENTED); @@ -450,17 +386,6 @@ void orte_ns_replica_recv(int status, orte_process_name_t* sender, } break; - case ORTE_NS_DUMP_CELLS_CMD: - if (ORTE_SUCCESS != (rc = orte_ns_replica_dump_cells_fn(&answer))) { - ORTE_ERROR_LOG(rc); - goto RETURN_ERROR; - } - if (0 > orte_rml.send_buffer(sender, &answer, tag, 0)) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - goto RETURN_ERROR; - } - break; - case ORTE_NS_DUMP_JOBIDS_CMD: if (ORTE_SUCCESS != (rc = orte_ns_replica_dump_jobs_fn(&answer))) { ORTE_ERROR_LOG(rc); diff --git a/orte/mca/odls/bproc/odls_bproc.c b/orte/mca/odls/bproc/odls_bproc.c index e69ef30e8b..3172210779 100644 --- a/orte/mca/odls/bproc/odls_bproc.c +++ b/orte/mca/odls/bproc/odls_bproc.c @@ -677,8 +677,8 @@ orte_odls_bproc_launch_local_procs(orte_gpr_notify_data_t *data,) child = (odls_bproc_child_t *) item; if(0 < mca_odls_bproc_component.debug) { opal_output(0, "orte_odls_bproc_launch: setting up io for " - "[%ld,%ld,%ld] proc rank %ld\n", - ORTE_NAME_ARGS((child->name)), + "%s proc rank %ld\n", + ORTE_NAME_PRINT((child->name)), (long)child->name->vpid); } /* only setup to forward stdin if it is rank 0, otherwise connect diff --git a/orte/mca/odls/default/odls_default_module.c b/orte/mca/odls/default/odls_default_module.c index b89299ba2d..71838b7307 100644 --- a/orte/mca/odls/default/odls_default_module.c +++ b/orte/mca/odls/default/odls_default_module.c @@ -530,8 +530,8 @@ int orte_odls_default_kill_local_procs(orte_jobid_t job, bool set_state) OBJ_CONSTRUCT(&procs_killed, opal_list_t); - opal_output(orte_odls_globals.output, "[%ld,%ld,%ld] odls_kill_local_proc: working on job %ld", - ORTE_NAME_ARGS(ORTE_PROC_MY_NAME), (long)job); + opal_output(orte_odls_globals.output, "%s odls_kill_local_proc: working on job %ld", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)job); /* since we are going to be working with the global list of * children, we need to protect that list from modification @@ -547,8 +547,8 @@ int orte_odls_default_kill_local_procs(orte_jobid_t job, bool set_state) /* preserve the pointer to the next item in list in case we release it */ next = opal_list_get_next(item); - opal_output(orte_odls_globals.output, "[%ld,%ld,%ld] odls_kill_local_proc: checking child process [%ld,%ld,%ld]", - ORTE_NAME_ARGS(ORTE_PROC_MY_NAME), ORTE_NAME_ARGS(child->name)); + opal_output(orte_odls_globals.output, "%s odls_kill_local_proc: checking child process %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(child->name)); /* do we have a child from the specified job? Because the * job could be given as a WILDCARD value, we must use @@ -565,8 +565,8 @@ int orte_odls_default_kill_local_procs(orte_jobid_t job, bool set_state) * to do to it */ if (!child->alive) { - opal_output(orte_odls_globals.output, "[%ld,%ld,%ld] odls_kill_local_proc: child [%ld,%ld,%ld] is not alive", - ORTE_NAME_ARGS(ORTE_PROC_MY_NAME), ORTE_NAME_ARGS(child->name)); + opal_output(orte_odls_globals.output, "%s odls_kill_local_proc: child %s is not alive", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(child->name)); /* ensure, though, that the state is terminated so we don't lockup if * the proc never started */ @@ -702,8 +702,8 @@ GOTCHILD: exception is detected and handled (in which case this unpublish request will be ignored/discarded. */ opal_output(orte_odls_globals.output, - "odls: pid %ld corresponds to [%lu,%lu,%lu]\n", - (long) pid, ORTE_NAME_ARGS(child->name)); + "odls: pid %ld corresponds to %s\n", + (long) pid, ORTE_NAME_PRINT(child->name)); if (0 == child->name->vpid) { rc = orte_iof.iof_unpublish(child->name, ORTE_NS_CMP_ALL, ORTE_IOF_STDIN); @@ -751,20 +751,20 @@ GOTCHILD: /* the abort file must exist - there is nothing in it we need. It's * meer existence indicates that an abnormal termination occurred */ - opal_output(orte_odls_globals.output, "odls: child [%ld,%ld,%ld] died by abort", - ORTE_NAME_ARGS(child->name)); + opal_output(orte_odls_globals.output, "odls: child %s died by abort", + ORTE_NAME_PRINT(child->name)); aborted = true; free(abort_file); } else { - opal_output(orte_odls_globals.output, "odls: child process [%ld,%ld,%ld] terminated normally", - ORTE_NAME_ARGS(child->name)); + opal_output(orte_odls_globals.output, "odls: child process %s terminated normally", + ORTE_NAME_PRINT(child->name)); } } else { /* the process was terminated with a signal! That's definitely * abnormal, so indicate that condition */ - opal_output(orte_odls_globals.output, "odls: child process [%ld,%ld,%ld] terminated with signal", - ORTE_NAME_ARGS(child->name)); + opal_output(orte_odls_globals.output, "odls: child process %s terminated with signal", + ORTE_NAME_PRINT(child->name)); aborted = true; } @@ -1419,7 +1419,6 @@ int orte_odls_default_launch_local_procs(orte_gpr_notify_data_t *data) filem_request = OBJ_NEW(orte_filem_base_request_t); filem_request->num_procs = 1; filem_request->proc_name = (orte_process_name_t*)malloc(sizeof(orte_process_name_t) * filem_request->num_procs); - filem_request->proc_name[0].cellid = orte_process_info.gpr_replica->cellid; filem_request->proc_name[0].jobid = orte_process_info.gpr_replica->jobid; filem_request->proc_name[0].vpid = orte_process_info.gpr_replica->vpid; if(app_item->app_context->preload_binary) { @@ -1518,8 +1517,8 @@ int orte_odls_default_launch_local_procs(orte_gpr_notify_data_t *data) * If it has been launched, then do nothing */ if (child->alive) { - opal_output(orte_odls_globals.output, "odls: child [%ld,%ld,%ld] is already alive", - ORTE_NAME_ARGS(child->name)); + opal_output(orte_odls_globals.output, "odls: child %s is already alive", + ORTE_NAME_PRINT(child->name)); continue; } @@ -1528,13 +1527,13 @@ int orte_odls_default_launch_local_procs(orte_gpr_notify_data_t *data) * the dss.compare function to check for equality. */ if (ORTE_EQUAL != orte_dss.compare(&job, &(child->name->jobid), ORTE_JOBID)) { - opal_output(orte_odls_globals.output, "odls: child [%ld,%ld,%ld] is not in job %ld being launched", - ORTE_NAME_ARGS(child->name), (long)job); + opal_output(orte_odls_globals.output, "odls: child %s is not in job %ld being launched", + ORTE_NAME_PRINT(child->name), (long)job); continue; } - opal_output(orte_odls_globals.output, "odls: preparing to launch child [%ld, %ld, %ld]", - ORTE_NAME_ARGS(child->name)); + opal_output(orte_odls_globals.output, "odls: preparing to launch child %s", + ORTE_NAME_PRINT(child->name)); /* find the indicated app_context in the list */ for (item2 = opal_list_get_first(&app_context_list); @@ -1711,8 +1710,8 @@ int orte_odls_default_deliver_message(orte_jobid_t job, orte_buffer_t *buffer, o if (ORTE_EQUAL != orte_dss.compare(&job, &(child->name->jobid), ORTE_JOBID)) { continue; } - opal_output(orte_odls_globals.output, "odls: sending message to tag %lu on child [%ld, %ld, %ld]", - (unsigned long)tag, ORTE_NAME_ARGS(child->name)); + opal_output(orte_odls_globals.output, "odls: sending message to tag %lu on child %s", + (unsigned long)tag, ORTE_NAME_PRINT(child->name)); /* if so, send the message */ rc = orte_rml.send_buffer(child->name, buffer, tag, 0); diff --git a/orte/mca/odls/process/odls_process_module.c b/orte/mca/odls/process/odls_process_module.c index 29845ac127..f57f62711a 100755 --- a/orte/mca/odls/process/odls_process_module.c +++ b/orte/mca/odls/process/odls_process_module.c @@ -248,8 +248,8 @@ static int orte_odls_process_kill_local_procs(orte_jobid_t job, bool set_state) OBJ_CONSTRUCT(&procs_killed, opal_list_t); - opal_output(orte_odls_globals.output, "[%ld,%ld,%ld] odls_kill_local_proc: working on job %ld", - ORTE_NAME_ARGS(ORTE_PROC_MY_NAME), (long)job); + opal_output(orte_odls_globals.output, "%s odls_kill_local_proc: working on job %ld", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (long)job); /* since we are going to be working with the global list of * children, we need to protect that list from modification @@ -265,8 +265,8 @@ static int orte_odls_process_kill_local_procs(orte_jobid_t job, bool set_state) /* preserve the pointer to the next item in list in case we release it */ next = opal_list_get_next(item); - opal_output(orte_odls_globals.output, "[%ld,%ld,%ld] odls_kill_local_proc: checking child process [%ld,%ld,%ld]", - ORTE_NAME_ARGS(ORTE_PROC_MY_NAME), ORTE_NAME_ARGS(child->name)); + opal_output(orte_odls_globals.output, "%s odls_kill_local_proc: checking child process %s", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(child->name)); /* do we have a child from the specified job? Because the * job could be given as a WILDCARD value, we must use @@ -283,8 +283,8 @@ static int orte_odls_process_kill_local_procs(orte_jobid_t job, bool set_state) * to do to it */ if (!child->alive) { - opal_output(orte_odls_globals.output, "[%ld,%ld,%ld] odls_kill_local_proc: child [%ld,%ld,%ld] is not alive", - ORTE_NAME_ARGS(ORTE_PROC_MY_NAME), ORTE_NAME_ARGS(child->name)); + opal_output(orte_odls_globals.output, "%s odls_kill_local_proc: child %s is not alive", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(child->name)); /* ensure, though, that the state is terminated so we don't lockup if * the proc never started */ @@ -418,8 +418,8 @@ GOTCHILD: exception is detected and handled (in which case this unpublish request will be ignored/discarded. */ opal_output(orte_odls_globals.output, - "odls: pid %ld corresponds to [%lu,%lu,%lu]\n", - (long) pid, ORTE_NAME_ARGS(child->name)); + "odls: pid %ld corresponds to %s\n", + (long) pid, ORTE_NAME_PRINT(child->name)); #if 0 if (0 == child->name->vpid) { rc = orte_iof.iof_unpublish(child->name, ORTE_NS_CMP_ALL, @@ -461,20 +461,20 @@ GOTCHILD: /* the abort file must exist - there is nothing in it we need. It's * meer existence indicates that an abnormal termination occurred */ - opal_output(orte_odls_globals.output, "odls: child [%ld,%ld,%ld] died by abort", - ORTE_NAME_ARGS(child->name)); + opal_output(orte_odls_globals.output, "odls: child %s died by abort", + ORTE_NAME_PRINT(child->name)); aborted = true; free(abort_file); } else { - opal_output(orte_odls_globals.output, "odls: child process [%ld,%ld,%ld] terminated normally", - ORTE_NAME_ARGS(child->name)); + opal_output(orte_odls_globals.output, "odls: child process %s terminated normally", + ORTE_NAME_PRINT(child->name)); } } else { /* the process was terminated with a signal! That's definitely * abnormal, so indicate that condition */ - opal_output(orte_odls_globals.output, "odls: child process [%ld,%ld,%ld] terminated with signal", - ORTE_NAME_ARGS(child->name)); + opal_output(orte_odls_globals.output, "odls: child process %s terminated with signal", + ORTE_NAME_PRINT(child->name)); aborted = true; } @@ -1007,7 +1007,6 @@ static int orte_odls_process_launch_local_procs(orte_gpr_notify_data_t *data) filem_request = OBJ_NEW(orte_filem_base_request_t); filem_request->num_procs = 1; filem_request->proc_name = (orte_process_name_t*)malloc(sizeof(orte_process_name_t) * filem_request->num_procs); - filem_request->proc_name[0].cellid = orte_process_info.gpr_replica->cellid; filem_request->proc_name[0].jobid = orte_process_info.gpr_replica->jobid; filem_request->proc_name[0].vpid = orte_process_info.gpr_replica->vpid; if(app_item->app_context->preload_binary) { @@ -1106,8 +1105,8 @@ static int orte_odls_process_launch_local_procs(orte_gpr_notify_data_t *data) * If it has been launched, then do nothing */ if (child->alive) { - opal_output(orte_odls_globals.output, "odls: child [%ld,%ld,%ld] is already alive", - ORTE_NAME_ARGS(child->name)); + opal_output(orte_odls_globals.output, "odls: child %s is already alive", + ORTE_NAME_PRINT(child->name)); continue; } @@ -1116,13 +1115,13 @@ static int orte_odls_process_launch_local_procs(orte_gpr_notify_data_t *data) * the dss.compare function to check for equality. */ if (ORTE_EQUAL != orte_dss.compare(&job, &(child->name->jobid), ORTE_JOBID)) { - opal_output(orte_odls_globals.output, "odls: child [%ld,%ld,%ld] is not in job %ld being launched", - ORTE_NAME_ARGS(child->name), (long)job); + opal_output(orte_odls_globals.output, "odls: child %s is not in job %ld being launched", + ORTE_NAME_PRINT(child->name), (long)job); continue; } - opal_output(orte_odls_globals.output, "odls: preparing to launch child [%ld, %ld, %ld]", - ORTE_NAME_ARGS(child->name)); + opal_output(orte_odls_globals.output, "odls: preparing to launch child %s", + ORTE_NAME_PRINT(child->name)); /* find the indicated app_context in the list */ for (item2 = opal_list_get_first(&app_context_list); @@ -1273,8 +1272,8 @@ int orte_odls_process_deliver_message(orte_jobid_t job, orte_buffer_t *buffer, o if (ORTE_EQUAL != orte_dss.compare(&job, &(child->name->jobid), ORTE_JOBID)) { continue; } - opal_output(orte_odls_globals.output, "odls: sending message to tag %lu on child [%ld, %ld, %ld]", - (unsigned long)tag, ORTE_NAME_ARGS(child->name)); + opal_output(orte_odls_globals.output, "odls: sending message to tag %lu on child %s", + (unsigned long)tag, ORTE_NAME_PRINT(child->name)); /* if so, send the message */ rc = orte_rml.send_buffer(child->name, buffer, tag, 0); diff --git a/orte/mca/oob/tcp/oob_tcp.c b/orte/mca/oob/tcp/oob_tcp.c index b139c76c8b..6a0ef044f6 100644 --- a/orte/mca/oob/tcp/oob_tcp.c +++ b/orte/mca/oob/tcp/oob_tcp.c @@ -440,8 +440,8 @@ static void mca_oob_tcp_accept(int incoming_sd) /* log the accept */ if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) { - opal_output(0, "[%lu,%lu,%lu] mca_oob_tcp_accept: %s:%d\n", - ORTE_NAME_ARGS(orte_process_info.my_name), + opal_output(0, "%s mca_oob_tcp_accept: %s:%d\n", + ORTE_NAME_PRINT(orte_process_info.my_name), opal_net_get_hostname((struct sockaddr*) &addr), opal_net_get_port((struct sockaddr*) &addr)); } @@ -632,8 +632,8 @@ static void* mca_oob_tcp_listen_thread(opal_object_t *obj) } if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) { - opal_output(0, "[%lu,%lu,%lu] mca_oob_tcp_listen_thread: (%d, %d) %s:%d\n", - ORTE_NAME_ARGS(orte_process_info.my_name), + opal_output(0, "%s mca_oob_tcp_listen_thread: (%d, %d) %s:%d\n", + ORTE_NAME_PRINT(orte_process_info.my_name), item->fd, opal_socket_errno, inet_ntoa(item->addr.sin_addr), item->addr.sin_port); @@ -703,8 +703,8 @@ static int mca_oob_tcp_listen_progress(void) /* log the accept */ if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) { - opal_output(0, "[%lu,%lu,%lu] mca_oob_tcp_listen_progress: %s:%d\n", - ORTE_NAME_ARGS(orte_process_info.my_name), + opal_output(0, "%s mca_oob_tcp_listen_progress: %s:%d\n", + ORTE_NAME_PRINT(orte_process_info.my_name), inet_ntoa(item->addr.sin_addr), item->addr.sin_port); } @@ -811,9 +811,9 @@ static void mca_oob_tcp_recv_probe(int sd, mca_oob_tcp_hdr_t* hdr) int retval = send(sd, (char *)ptr+cnt, sizeof(mca_oob_tcp_hdr_t)-cnt, 0); if(retval < 0) { if(opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) { - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_recv_probe: send() failed: %s (%d)\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(hdr->msg_src)), + opal_output(0, "%s-%s mca_oob_tcp_peer_recv_probe: send() failed: %s (%d)\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(hdr->msg_src)), strerror(opal_socket_errno), opal_socket_errno); CLOSE_THE_SOCKET(sd); @@ -837,13 +837,13 @@ static void mca_oob_tcp_recv_connect(int sd, mca_oob_tcp_hdr_t* hdr) /* now set socket up to be non-blocking */ if((flags = fcntl(sd, F_GETFL, 0)) < 0) { - opal_output(0, "[%lu,%lu,%lu] mca_oob_tcp_recv_handler: fcntl(F_GETFL) failed: %s (%d)", - ORTE_NAME_ARGS(orte_process_info.my_name), strerror(opal_socket_errno), opal_socket_errno); + opal_output(0, "%s mca_oob_tcp_recv_handler: fcntl(F_GETFL) failed: %s (%d)", + ORTE_NAME_PRINT(orte_process_info.my_name), strerror(opal_socket_errno), opal_socket_errno); } else { flags |= O_NONBLOCK; if(fcntl(sd, F_SETFL, flags) < 0) { - opal_output(0, "[%lu,%lu,%lu] mca_oob_tcp_recv_handler: fcntl(F_SETFL) failed: %s (%d)", - ORTE_NAME_ARGS(orte_process_info.my_name), strerror(opal_socket_errno), opal_socket_errno); + opal_output(0, "%s mca_oob_tcp_recv_handler: fcntl(F_SETFL) failed: %s (%d)", + ORTE_NAME_PRINT(orte_process_info.my_name), strerror(opal_socket_errno), opal_socket_errno); } } @@ -858,25 +858,24 @@ static void mca_oob_tcp_recv_connect(int sd, mca_oob_tcp_hdr_t* hdr) if (ORTE_SUCCESS != orte_ns.reserve_range(hdr->msg_src.jobid, 1, &hdr->msg_src.vpid)) { return; } - hdr->msg_src.cellid = ORTE_PROC_MY_NAME->cellid; } /* lookup the corresponding process */ peer = mca_oob_tcp_peer_lookup(&hdr->msg_src); if(NULL == peer) { - opal_output(0, "[%lu,%lu,%lu] mca_oob_tcp_recv_handler: unable to locate peer", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s mca_oob_tcp_recv_handler: unable to locate peer", + ORTE_NAME_PRINT(orte_process_info.my_name)); CLOSE_THE_SOCKET(sd); return; } /* is the peer instance willing to accept this connection */ if(mca_oob_tcp_peer_accept(peer, sd) == false) { if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT_FAIL) { - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_recv_handler: " - "rejected connection from [%lu,%lu,%lu] connection state %d", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name)), - ORTE_NAME_ARGS(&(hdr->msg_src)), + opal_output(0, "%s-%s mca_oob_tcp_recv_handler: " + "rejected connection from %s connection state %d", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name)), + ORTE_NAME_PRINT(&(hdr->msg_src)), peer->peer_state); } CLOSE_THE_SOCKET(sd); @@ -911,15 +910,15 @@ static void mca_oob_tcp_recv_handler(int sd, short flags, void* user) while((rc = recv(sd, (char *)&hdr, sizeof(hdr), 0)) != sizeof(hdr)) { if(rc >= 0) { if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT_FAIL) { - opal_output(0, "[%lu,%lu,%lu] mca_oob_tcp_recv_handler: peer closed connection", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s mca_oob_tcp_recv_handler: peer closed connection", + ORTE_NAME_PRINT(orte_process_info.my_name)); } CLOSE_THE_SOCKET(sd); return; } if(opal_socket_errno != EINTR) { - opal_output(0, "[%lu,%lu,%lu] mca_oob_tcp_recv_handler: recv() failed: %s (%d)\n", - ORTE_NAME_ARGS(orte_process_info.my_name), strerror(opal_socket_errno), opal_socket_errno); + opal_output(0, "%s mca_oob_tcp_recv_handler: recv() failed: %s (%d)\n", + ORTE_NAME_PRINT(orte_process_info.my_name), strerror(opal_socket_errno), opal_socket_errno); CLOSE_THE_SOCKET(sd); return; } @@ -935,8 +934,8 @@ static void mca_oob_tcp_recv_handler(int sd, short flags, void* user) mca_oob_tcp_recv_connect(sd, &hdr); break; default: - opal_output(0, "[%lu,%lu,%lu] mca_oob_tcp_recv_handler: invalid message type: %d\n", - ORTE_NAME_ARGS(orte_process_info.my_name), hdr.msg_type); + opal_output(0, "%s mca_oob_tcp_recv_handler: invalid message type: %d\n", + ORTE_NAME_PRINT(orte_process_info.my_name), hdr.msg_type); CLOSE_THE_SOCKET(sd); break; } @@ -1103,8 +1102,8 @@ int mca_oob_tcp_init(void) 16); /* increment to grow by */ opal_progress_register(mca_oob_tcp_listen_progress); if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_INFO) { - opal_output(0, "[%lu,%lu,%lu] accepting connections via listen thread", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s accepting connections via listen thread", + ORTE_NAME_PRINT(orte_process_info.my_name)); } } else { /* fix up the listen_type, since we might have been in thread, @@ -1132,8 +1131,8 @@ int mca_oob_tcp_init(void) } #endif if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_INFO) { - opal_output(0, "[%lu,%lu,%lu] accepting connections via event library", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s accepting connections via event library", + ORTE_NAME_PRINT(orte_process_info.my_name)); } } @@ -1457,9 +1456,9 @@ mca_oob_tcp_get_new_name(orte_process_name_t* name) size = 0; if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_ALL) { - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_get_new_name: starting\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name))); + opal_output(0, "%s-%s mca_oob_tcp_get_new_name: starting\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name))); } /* turn the size to network byte order so there will be no problems */ @@ -1483,8 +1482,8 @@ mca_oob_tcp_get_new_name(orte_process_name_t* name) if (ORTE_SUCCESS == rc) { *name = *orte_process_info.my_name; if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_ALL) { - opal_output(0, "[%lu,%lu,%lu] mca_oob_tcp_get_new_name: done\n", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s mca_oob_tcp_get_new_name: done\n", + ORTE_NAME_PRINT(orte_process_info.my_name)); } } diff --git a/orte/mca/oob/tcp/oob_tcp_msg.c b/orte/mca/oob/tcp/oob_tcp_msg.c index 1236b3d499..9372d3a2bb 100644 --- a/orte/mca/oob/tcp/oob_tcp_msg.c +++ b/orte/mca/oob/tcp/oob_tcp_msg.c @@ -267,9 +267,9 @@ bool mca_oob_tcp_msg_send_handler(mca_oob_tcp_msg_t* msg, struct mca_oob_tcp_pee else if (opal_socket_errno == EAGAIN || opal_socket_errno == EWOULDBLOCK) return false; else { - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_msg_send_handler: writev failed: %s (%d)", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name)), + opal_output(0, "%s-%s mca_oob_tcp_msg_send_handler: writev failed: %s (%d)", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name)), strerror(opal_socket_errno), opal_socket_errno); mca_oob_tcp_peer_close(peer); @@ -315,9 +315,9 @@ bool mca_oob_tcp_msg_recv_handler(mca_oob_tcp_msg_t* msg, struct mca_oob_tcp_pee if(msg->msg_hdr.msg_size > 0) { msg->msg_rwbuf = malloc(msg->msg_hdr.msg_size); if(NULL == msg->msg_rwbuf) { - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_msg_recv_handler: malloc(%d) failed\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name)), + opal_output(0, "%s-%s mca_oob_tcp_msg_recv_handler: malloc(%d) failed\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name)), msg->msg_hdr.msg_size); mca_oob_tcp_peer_close(peer); return false; @@ -331,9 +331,9 @@ bool mca_oob_tcp_msg_recv_handler(mca_oob_tcp_msg_t* msg, struct mca_oob_tcp_pee msg->msg_rwnum = 0; } if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_INFO) { - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_msg_recv_handler: size %lu\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name)), + opal_output(0, "%s-%s mca_oob_tcp_msg_recv_handler: size %lu\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name)), (unsigned long)(msg->msg_hdr.msg_size) ); } } @@ -372,9 +372,9 @@ static bool mca_oob_tcp_msg_recv(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* pee under UNIX/Linux environments */ else if (opal_socket_errno == EAGAIN || opal_socket_errno == EWOULDBLOCK) return false; - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_msg_recv: readv failed: %s (%d)", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name)), + opal_output(0, "%s-%s mca_oob_tcp_msg_recv: readv failed: %s (%d)", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name)), strerror(opal_socket_errno), opal_socket_errno); mca_oob_tcp_peer_close(peer); @@ -384,9 +384,9 @@ static bool mca_oob_tcp_msg_recv(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* pee return false; } else if (rc == 0) { if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT_FAIL) { - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_msg_recv: peer closed connection", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name))); + opal_output(0, "%s-%s mca_oob_tcp_msg_recv: peer closed connection", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name))); } mca_oob_tcp_peer_close(peer); if (NULL != mca_oob_tcp.oob_exception_callback) { @@ -431,11 +431,9 @@ void mca_oob_tcp_msg_recv_complete(mca_oob_tcp_msg_t* msg, mca_oob_tcp_peer_t* p mca_oob_tcp_msg_data(msg,peer); break; default: - opal_output(0, "[%lu,%lu,%lu] mca_oob_tcp_msg_recv_complete: invalid message type: %d from peer [%lu,%lu,%lu]\n", - ORTE_NAME_ARGS(orte_process_info.my_name), msg->msg_hdr.msg_type, - (long)(peer->peer_name.cellid), - (long)(peer->peer_name.jobid), - (long)(peer->peer_name.vpid)); + opal_output(0, "%s mca_oob_tcp_msg_recv_complete: invalid message type: %d from peer %s\n", + ORTE_NAME_PRINT(orte_process_info.my_name), msg->msg_hdr.msg_type, + ORTE_NAME_PRINT(&peer->peer_name)); MCA_OOB_TCP_MSG_RETURN(msg); break; } diff --git a/orte/mca/oob/tcp/oob_tcp_peer.c b/orte/mca/oob/tcp/oob_tcp_peer.c index e821044c2d..4b730a4b5c 100644 --- a/orte/mca/oob/tcp/oob_tcp_peer.c +++ b/orte/mca/oob/tcp/oob_tcp_peer.c @@ -285,9 +285,9 @@ mca_oob_tcp_peer_create_socket(mca_oob_tcp_peer_t* peer, if (peer->peer_sd < 0) { opal_output(0, - "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_create_socket: socket() failed: %s (%d)\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name)), + "%s-%s mca_oob_tcp_peer_create_socket: socket() failed: %s (%d)\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name)), strerror(opal_socket_errno), opal_socket_errno); mca_oob_tcp_peer_shutdown(peer); @@ -303,17 +303,17 @@ mca_oob_tcp_peer_create_socket(mca_oob_tcp_peer_t* peer, /* setup the socket as non-blocking */ if (peer->peer_sd >= 0) { if((flags = fcntl(peer->peer_sd, F_GETFL, 0)) < 0) { - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_connect: fcntl(F_GETFL) failed: %s (%d)\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name)), + opal_output(0, "%s-%s mca_oob_tcp_peer_connect: fcntl(F_GETFL) failed: %s (%d)\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name)), strerror(opal_socket_errno), opal_socket_errno); } else { flags |= O_NONBLOCK; if(fcntl(peer->peer_sd, F_SETFL, flags) < 0) - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_connect: fcntl(F_SETFL) failed: %s (%d)\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name)), + opal_output(0, "%s-%s mca_oob_tcp_peer_connect: fcntl(F_SETFL) failed: %s (%d)\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name)), strerror(opal_socket_errno), opal_socket_errno); } @@ -335,20 +335,20 @@ static int mca_oob_tcp_peer_try_connect(mca_oob_tcp_peer_t* peer) do { /* pick an address in round-robin fashion from the list exported by the peer */ if(ORTE_SUCCESS != (rc = mca_oob_tcp_addr_get_next(peer->peer_addr, (struct sockaddr*) &inaddr))) { - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_try_connect: " + opal_output(0, "%s-%s mca_oob_tcp_peer_try_connect: " "mca_oob_tcp_addr_get_next failed with error=%d", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name)), + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name)), rc); mca_oob_tcp_peer_close(peer); return ORTE_ERR_UNREACH; } if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) { - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_try_connect: " + opal_output(0, "%s-%s mca_oob_tcp_peer_try_connect: " "connecting port %d to: %s:%d\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name)), + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name)), /* Bug, FIXME: output tcp6_listen_port for AF_INET6 */ ntohs(mca_oob_tcp_component.tcp_listen_port), opal_net_get_hostname((struct sockaddr*) &inaddr), @@ -375,10 +375,10 @@ static int mca_oob_tcp_peer_try_connect(mca_oob_tcp_peer_t* peer) return ORTE_SUCCESS; } - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_try_connect: " + opal_output(0, "%s-%s mca_oob_tcp_peer_try_connect: " "connect to %s:%d failed: %s (%d)", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name)), + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name)), opal_net_get_hostname((struct sockaddr*) &inaddr), opal_net_get_port((struct sockaddr*) &inaddr), strerror(opal_socket_errno), @@ -393,10 +393,10 @@ static int mca_oob_tcp_peer_try_connect(mca_oob_tcp_peer_t* peer) return ORTE_SUCCESS; } else { opal_output(0, - "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_try_connect: " + "%s-%s mca_oob_tcp_peer_try_connect: " "mca_oob_tcp_peer_send_connect_ack to %s:%d failed: %s (%d)", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name)), + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name)), opal_net_get_hostname((struct sockaddr*) &inaddr), opal_net_get_port((struct sockaddr*) &inaddr), opal_strerror(rc), @@ -405,10 +405,10 @@ static int mca_oob_tcp_peer_try_connect(mca_oob_tcp_peer_t* peer) } while(peer->peer_addr->addr_next != 0); /* None of the interfaces worked.. */ - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_try_connect: " + opal_output(0, "%s-%s mca_oob_tcp_peer_try_connect: " "connect to %s:%d failed, connecting over all interfaces failed!", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name)), + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name)), opal_net_get_hostname((struct sockaddr*) &inaddr), opal_net_get_port((struct sockaddr*) &inaddr)); mca_oob_tcp_peer_close(peer); @@ -453,9 +453,9 @@ static void mca_oob_tcp_peer_complete_connect(mca_oob_tcp_peer_t* peer, int sd) /* check connect completion status */ if(getsockopt(sd, SOL_SOCKET, SO_ERROR, (char *)&so_error, &so_length) < 0) { - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_complete_connect: getsockopt() failed: %s (%d)\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name)), + opal_output(0, "%s-%s mca_oob_tcp_peer_complete_connect: getsockopt() failed: %s (%d)\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name)), strerror(opal_socket_errno), opal_socket_errno); mca_oob_tcp_peer_close(peer); @@ -467,17 +467,17 @@ static void mca_oob_tcp_peer_complete_connect(mca_oob_tcp_peer_t* peer, int sd) return; } else if (so_error == ECONNREFUSED || so_error == ETIMEDOUT) { struct timeval tv = { 1,0 }; - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_complete_connect: " + opal_output(0, "%s-%s mca_oob_tcp_peer_complete_connect: " "connection failed: %s (%d) - retrying\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name)), + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name)), strerror(so_error), so_error); if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) { - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_complete_connect: " + opal_output(0, "%s-%s mca_oob_tcp_peer_complete_connect: " "sending ack, %d", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name)), so_error); + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name)), so_error); } mca_oob_tcp_peer_shutdown(peer); @@ -495,9 +495,9 @@ static void mca_oob_tcp_peer_complete_connect(mca_oob_tcp_peer_t* peer, int sd) peer->peer_state = MCA_OOB_TCP_CONNECT_ACK; opal_event_add(&peer->peer_recv_event, 0); } else { - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_complete_connect: unable to send connect ack.", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name))); + opal_output(0, "%s-%s mca_oob_tcp_peer_complete_connect: unable to send connect ack.", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name))); mca_oob_tcp_peer_close(peer); } } @@ -528,9 +528,9 @@ static void mca_oob_tcp_peer_connected(mca_oob_tcp_peer_t* peer, int sd) void mca_oob_tcp_peer_close(mca_oob_tcp_peer_t* peer) { if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) { - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_close(%p) sd %d state %d\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name)), + opal_output(0, "%s-%s mca_oob_tcp_peer_close(%p) sd %d state %d\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name)), (void*)peer, peer->peer_sd, peer->peer_state); @@ -557,9 +557,9 @@ void mca_oob_tcp_peer_shutdown(mca_oob_tcp_peer_t* peer) if(peer->peer_retries++ > mca_oob_tcp_component.tcp_peer_retries) { mca_oob_tcp_msg_t *msg; - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_shutdown: retries exceeded", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name))); + opal_output(0, "%s-%s mca_oob_tcp_peer_shutdown: retries exceeded", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name))); /* There are cases during the initial connection setup where the peer_send_msg is NULL but there are things in the queue @@ -638,10 +638,10 @@ static int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* peer, int sd) struct timeval tv = { 1,0 }; if (mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_CONNECT) { opal_output(0, - "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_recv_connect_ack " + "%s-%s mca_oob_tcp_peer_recv_connect_ack " "connect failed during receive. Restarting (%s).", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name)), + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name)), strerror(opal_socket_errno)); } opal_event_del(&peer->peer_recv_event); @@ -663,11 +663,11 @@ static int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* peer, int sd) /* compare the peers name to the expected value */ if (0 != orte_ns.compare_fields(ORTE_NS_CMP_ALL, &peer->peer_name, &hdr.msg_src)) { - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_recv_connect_ack: " - "received unexpected process identifier [%ld,%ld,%ld]\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name)), - ORTE_NAME_ARGS(&(hdr.msg_src))); + opal_output(0, "%s-%s mca_oob_tcp_peer_recv_connect_ack: " + "received unexpected process identifier %s\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name)), + ORTE_NAME_PRINT(&(hdr.msg_src))); mca_oob_tcp_peer_close(peer); return ORTE_ERR_UNREACH; } @@ -678,7 +678,7 @@ static int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* peer, int sd) */ if(orte_process_info.my_name == NULL) { orte_ns.create_process_name(&orte_process_info.my_name, - hdr.msg_dst.cellid, hdr.msg_dst.jobid, hdr.msg_dst.vpid); + hdr.msg_dst.jobid, hdr.msg_dst.vpid); } else if (orte_ns.compare_fields(ORTE_NS_CMP_ALL, orte_process_info.my_name, ORTE_NAME_INVALID) == ORTE_EQUAL) { *orte_process_info.my_name = hdr.msg_dst; } @@ -706,10 +706,10 @@ static int mca_oob_tcp_peer_recv_blocking(mca_oob_tcp_peer_t* peer, int sd, void /* remote closed connection */ if(retval == 0) { if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_INFO) { - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_recv_blocking: " + opal_output(0, "%s-%s mca_oob_tcp_peer_recv_blocking: " "peer closed connection: peer state %d", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name)), + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name)), peer->peer_state); } mca_oob_tcp_peer_close(peer); @@ -739,10 +739,10 @@ static int mca_oob_tcp_peer_recv_blocking(mca_oob_tcp_peer_t* peer, int sd, void return -1; } else { opal_output(0, - "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_recv_blocking: " + "%s-%s mca_oob_tcp_peer_recv_blocking: " "recv() failed: %s (%d)\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name)), + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name)), strerror(errno), errno); mca_oob_tcp_peer_close(peer); @@ -768,9 +768,9 @@ static int mca_oob_tcp_peer_send_blocking(mca_oob_tcp_peer_t* peer, int sd, void int retval = send(sd, (char *)ptr+cnt, size-cnt, 0); if(retval < 0) { if(opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) { - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_send_blocking: send() failed: %s (%d)\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name)), + opal_output(0, "%s-%s mca_oob_tcp_peer_send_blocking: send() failed: %s (%d)\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name)), strerror(opal_socket_errno), opal_socket_errno); mca_oob_tcp_peer_close(peer); @@ -835,9 +835,9 @@ static void mca_oob_tcp_peer_recv_handler(int sd, short flags, void* user) mca_oob_tcp_msg_t* msg; MCA_OOB_TCP_MSG_ALLOC(msg, rc); if(NULL == msg) { - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_recv_handler: unable to allocate recv message\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name))); + opal_output(0, "%s-%s mca_oob_tcp_peer_recv_handler: unable to allocate recv message\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name))); return; } msg->msg_type = MCA_OOB_TCP_UNEXPECTED; @@ -865,9 +865,9 @@ static void mca_oob_tcp_peer_recv_handler(int sd, short flags, void* user) } default: { - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_recv_handler: invalid socket state(%d)", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name)), + opal_output(0, "%s-%s mca_oob_tcp_peer_recv_handler: invalid socket state(%d)", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name)), peer->peer_state); mca_oob_tcp_peer_close(peer); break; @@ -913,9 +913,9 @@ static void mca_oob_tcp_peer_send_handler(int sd, short flags, void* user) break; } default: - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_send_handler: invalid connection state (%d)", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name)), + opal_output(0, "%s-%s mca_oob_tcp_peer_send_handler: invalid connection state (%d)", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name)), peer->peer_state); opal_event_del(&peer->peer_send_event); break; @@ -979,9 +979,9 @@ static void mca_oob_tcp_peer_dump(mca_oob_tcp_peer_t* peer, const char* msg) nodelay = 0; #endif - sprintf(buff, "[%lu,%lu,%lu]-[%lu,%lu,%lu] %s: %s - %s nodelay %d sndbuf %d rcvbuf %d flags %08x\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name)), + sprintf(buff, "%s-%s %s: %s - %s nodelay %d sndbuf %d rcvbuf %d flags %08x\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name)), msg, src, dst, nodelay, sndbuf, rcvbuf, flags); opal_output(0, buff); } @@ -1014,10 +1014,10 @@ bool mca_oob_tcp_peer_accept(mca_oob_tcp_peer_t* peer, int sd) mca_oob_tcp_peer_event_init(peer); if(mca_oob_tcp_peer_send_connect_ack(peer, sd) != ORTE_SUCCESS) { - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_peer_accept: " + opal_output(0, "%s-%s mca_oob_tcp_peer_accept: " "mca_oob_tcp_peer_send_connect_ack failed\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name))); + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name))); mca_oob_tcp_peer_close(peer); OPAL_THREAD_UNLOCK(&peer->peer_lock); return false; diff --git a/orte/mca/oob/tcp/oob_tcp_ping.c b/orte/mca/oob/tcp/oob_tcp_ping.c index 0e1600209f..c50d9bd57c 100644 --- a/orte/mca/oob/tcp/oob_tcp_ping.c +++ b/orte/mca/oob/tcp/oob_tcp_ping.c @@ -88,9 +88,9 @@ mca_oob_tcp_ping(const orte_process_name_t* name, /* parse uri string */ if(ORTE_SUCCESS != (rc = mca_oob_tcp_parse_uri(uri, (struct sockaddr*) &inaddr))) { opal_output(0, - "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_ping: invalid uri: %s\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(name), + "%s-%s mca_oob_tcp_ping: invalid uri: %s\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(name), uri); return rc; } @@ -99,9 +99,9 @@ mca_oob_tcp_ping(const orte_process_name_t* name, sd = socket(inaddr.ss_family, SOCK_STREAM, 0); if (sd < 0) { opal_output(0, - "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_ping: socket() failed: %s (%d)\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(name), + "%s-%s mca_oob_tcp_ping: socket() failed: %s (%d)\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(name), strerror(opal_socket_errno), opal_socket_errno); return ORTE_ERR_UNREACH; @@ -109,17 +109,17 @@ mca_oob_tcp_ping(const orte_process_name_t* name, /* setup the socket as non-blocking */ if((flags = fcntl(sd, F_GETFL, 0)) < 0) { - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_ping: fcntl(F_GETFL) failed: %s (%d)\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(name), + opal_output(0, "%s-%s mca_oob_tcp_ping: fcntl(F_GETFL) failed: %s (%d)\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(name), strerror(opal_socket_errno), opal_socket_errno); } else { flags |= O_NONBLOCK; if(fcntl(sd, F_SETFL, flags) < 0) { - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_ping: fcntl(F_SETFL) failed: %s (%d)\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(name), + opal_output(0, "%s-%s mca_oob_tcp_ping: fcntl(F_SETFL) failed: %s (%d)\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(name), strerror(opal_socket_errno), opal_socket_errno); } @@ -141,9 +141,9 @@ mca_oob_tcp_ping(const orte_process_name_t* name, if(connect(sd, (struct sockaddr*)&inaddr, addrlen) < 0) { /* connect failed? */ if(opal_socket_errno != EINPROGRESS && opal_socket_errno != EWOULDBLOCK) { - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_ping: connect failed: %s (%d)\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(name), + opal_output(0, "%s-%s mca_oob_tcp_ping: connect failed: %s (%d)\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(name), strerror(opal_socket_errno), opal_socket_errno); CLOSE_THE_SOCKET(sd); @@ -163,9 +163,9 @@ mca_oob_tcp_ping(const orte_process_name_t* name, /* set socket back to blocking */ flags &= ~O_NONBLOCK; if(fcntl(sd, F_SETFL, flags) < 0) { - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_ping: fcntl(F_SETFL) failed: %s (%d)\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(name), + opal_output(0, "%s-%s mca_oob_tcp_ping: fcntl(F_SETFL) failed: %s (%d)\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(name), strerror(opal_socket_errno), opal_socket_errno); } diff --git a/orte/mca/oob/tcp/oob_tcp_send.c b/orte/mca/oob/tcp/oob_tcp_send.c index e84a1045b0..e5fdd2a7c0 100644 --- a/orte/mca/oob/tcp/oob_tcp_send.c +++ b/orte/mca/oob/tcp/oob_tcp_send.c @@ -117,9 +117,9 @@ int mca_oob_tcp_send_nb( } if(mca_oob_tcp_component.tcp_debug >= OOB_TCP_DEBUG_ALL) { - opal_output(0, "[%lu,%lu,%lu]-[%lu,%lu,%lu] mca_oob_tcp_send_nb: tag %d size %lu\n", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(&(peer->peer_name)), + opal_output(0, "%s-%s mca_oob_tcp_send_nb: tag %d size %lu\n", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(&(peer->peer_name)), tag, (unsigned long)size ); } diff --git a/orte/mca/pls/base/pls_base_dmn_registry_fns.c b/orte/mca/pls/base/pls_base_dmn_registry_fns.c index 9551942379..e69de29bb2 100644 --- a/orte/mca/pls/base/pls_base_dmn_registry_fns.c +++ b/orte/mca/pls/base/pls_base_dmn_registry_fns.c @@ -1,376 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - * - */ - -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include - -#include "opal/util/output.h" -#include "opal/util/argv.h" - -#include "orte/mca/ns/ns.h" -#include "orte/mca/gpr/gpr.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rmgr/rmgr.h" - -#include "orte/mca/pls/base/pls_private.h" - -static void orte_pls_daemon_info_construct(orte_pls_daemon_info_t* ptr) -{ - ptr->cell = ORTE_CELLID_INVALID; - ptr->nodename = NULL; - ptr->name = NULL; - ptr->active_job = ORTE_JOBID_INVALID; -} - -/* destructor - used to free any resources held by instance */ -static void orte_pls_daemon_info_destructor(orte_pls_daemon_info_t* ptr) -{ - if (NULL != ptr->nodename) free(ptr->nodename); - if (NULL != ptr->name) free(ptr->name); -} -OBJ_CLASS_INSTANCE(orte_pls_daemon_info_t, /* type name */ - opal_list_item_t, /* parent "class" name */ - orte_pls_daemon_info_construct, /* constructor */ - orte_pls_daemon_info_destructor); /* destructor */ - -/* - * Store the active daemons for a job - */ -int orte_pls_base_store_active_daemons(opal_list_t *daemons) -{ - orte_pls_daemon_info_t *dmn; - opal_list_item_t *item; - orte_gpr_value_t **values; - char *jobid_string, *key; - int rc, i, num_daemons; - - /* determine the number of daemons */ - num_daemons = (int)opal_list_get_size(daemons); - - if (0 == num_daemons) { - return ORTE_SUCCESS; - } - - /* since each daemon gets recorded in a separate node's container, - * we need to allocate space for num_daemons value objects - */ - values = (orte_gpr_value_t**)malloc(num_daemons * sizeof(orte_gpr_value_t*)); - if (NULL == values) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - memset(values, 0, num_daemons*sizeof(orte_gpr_value_t*)); /* NULL the array */ - - /* loop through the values and the list and create all the value objects */ - item = opal_list_get_first(daemons); - for (i=0; i < num_daemons; i++) { - dmn = (orte_pls_daemon_info_t*)item; - - if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&values[i], - ORTE_GPR_OVERWRITE, - ORTE_NODE_SEGMENT, - 1, 0))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - goto CLEANUP; - } - - if (ORTE_SUCCESS != (rc = orte_schema.get_node_tokens(&(values[i]->tokens), &(values[i]->num_tokens), - dmn->cell, dmn->nodename))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - - /* setup the key */ - if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, dmn->active_job))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(values[0]); - return rc; - } - asprintf(&key, "%s-%s", ORTE_NODE_BOOTPROXY_KEY, jobid_string); - free(jobid_string); - - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[i]->keyvals[0]), key, ORTE_NAME, dmn->name))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - free(key); - item = opal_list_get_next(item); - } - - rc = orte_gpr.put(num_daemons, values); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - } - -CLEANUP: - for (i=0; i < num_daemons; i++) { - if (NULL != values[i]) OBJ_RELEASE(values[i]); - } - if (NULL != values) free(values); - - return rc; -} - -static int get_daemons(opal_list_t *daemons, orte_jobid_t job) -{ - orte_gpr_value_t **values; - orte_gpr_keyval_t *kv; - orte_std_cntr_t cnt, i, j; - char* jobid_string; - char *keys[] = { - NULL, /* placeholder */ - ORTE_NODE_NAME_KEY, - ORTE_CELLID_KEY, - NULL - }; - orte_cellid_t *cell; - char *nodename; - orte_process_name_t *name; - orte_pls_daemon_info_t *dmn, *dmn2; - bool found_name, found_node, found_cell; - opal_list_item_t *item; - bool check_dups; - int rc; - - /* check the list to see if there is anything already on it. If there is, then - * we will need to check for duplicate entries before we add something. If not, - * then this can go a lot faster - */ - if (0 < opal_list_get_size(daemons)) { - check_dups = true; - } else { - check_dups = false; - } - - /* setup the key */ - if (ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&jobid_string, job))) { - ORTE_ERROR_LOG(rc); - return rc; - } - asprintf(&keys[0], "%s-%s", ORTE_NODE_BOOTPROXY_KEY, jobid_string); - free(jobid_string); - - /* query the daemon info */ - if (ORTE_SUCCESS != (rc = orte_gpr.get(ORTE_GPR_KEYS_OR|ORTE_GPR_TOKENS_OR, - ORTE_NODE_SEGMENT, - NULL, /* all containers */ - keys, - &cnt, &values))) { - ORTE_ERROR_LOG(rc); - free(keys[0]); - return rc; - } - - /* loop through the answers and construct the list */ - for (i=0; i < cnt; i++) { - /* for systems such as bproc, the node segment holds containers - * for nodes that we may not have launched upon. Each container - * will send us back a value object, so we have to ensure here - * that we only create daemon objects on the list for those nodes - * that DO provide a valid object - */ - found_name = found_node = found_cell = false; - for (j=0; j < values[i]->cnt; j++) { - kv = values[i]->keyvals[j]; - if (0 == strcmp(kv->key, keys[0])) { - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&name, kv->value, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - found_name = true; - continue; - } - if (0 == strcmp(kv->key, ORTE_NODE_NAME_KEY)) { - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&nodename, kv->value, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - found_node = true; - continue; - } - if (0 == strcmp(kv->key, ORTE_CELLID_KEY)) { - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&cell, kv->value, ORTE_CELLID))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - found_cell = true; - continue; - } - } - /* if we found everything, then this is a valid entry */ - if (found_name && found_node && found_cell) { - /* first check if this name is ourself - if so, ignore it */ - if (ORTE_EQUAL == orte_dss.compare(name, ORTE_PROC_MY_NAME, ORTE_NAME)) { - goto MOVEON; - } - - if (check_dups) { - /* see if this daemon is already on the list - if so, then we don't add it */ - for (item = opal_list_get_first(daemons); - item != opal_list_get_end(daemons); - item = opal_list_get_next(item)) { - dmn2 = (orte_pls_daemon_info_t*)item; - - if (ORTE_EQUAL == orte_dss.compare(dmn2->name, name, ORTE_NAME)) { - /* already on list - ignore it */ - goto MOVEON; - } - } - } - dmn = OBJ_NEW(orte_pls_daemon_info_t); - if (NULL == dmn) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - rc = ORTE_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&(dmn->name), name, ORTE_NAME))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(dmn); - goto CLEANUP; - } - dmn->cell = *cell; - if (NULL != nodename) { - dmn->nodename = strdup(nodename); - } - - /* add this daemon to the list */ - opal_list_append(daemons, &dmn->super); - } -MOVEON: - OBJ_RELEASE(values[i]); - } - -CLEANUP: - for (i=0; i < cnt; i++) { - if (NULL != values[i]) OBJ_RELEASE(values[i]); - } - if (NULL != values) free(values); - free(keys[0]); - - return rc; -} - -/* - * Retrieve a list of the active daemons for a job - */ -int orte_pls_base_get_active_daemons(opal_list_t *daemons, orte_jobid_t job, opal_list_t *attrs) -{ - orte_jobid_t *jobs; - orte_std_cntr_t njobs, i; - bool allocated; - int rc; - - if (NULL != orte_rmgr.find_attribute(attrs, ORTE_NS_INCLUDE_DESCENDANTS)) { - /* need to include all descendants in list */ - if (ORTE_SUCCESS != (rc = orte_ns.get_job_descendants(&jobs, &njobs, job))) { - ORTE_ERROR_LOG(rc); - return rc; - } - allocated = true; - } else if (NULL != orte_rmgr.find_attribute(attrs, ORTE_NS_INCLUDE_CHILDREN)) { - /* just include the direct children of the job */ - if (ORTE_SUCCESS != (rc = orte_ns.get_job_children(&jobs, &njobs, job))) { - ORTE_ERROR_LOG(rc); - return rc; - } - allocated = true; - } else { - /* just want daemons for this one job */ - jobs = &job; - njobs = 1; - allocated = false; - } - - /* loop through all the jobs and get their info */ - for (i=0; i < njobs; i++) { - if (ORTE_SUCCESS != (rc = get_daemons(daemons, jobs[i]))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - } - -CLEANUP: - if (allocated) free(jobs); - - return ORTE_SUCCESS; -} - -/* - * Remove a daemon from the world of active daemons - */ -int orte_pls_base_remove_daemon(orte_pls_daemon_info_t *info) -{ - /* We need to do a registry - * delete function call targeting the entry - */ - - return ORTE_SUCCESS; -} - - -/* - * Check for available daemons we can re-use - */ -int orte_pls_base_check_avail_daemons(opal_list_t *daemons, - orte_jobid_t job) -{ - orte_jobid_t root, *descendants; - orte_std_cntr_t i, ndesc; - int rc; - - /* check for daemons belonging to any job in this job's family. - * Since the jobs in any family must exit together, it is reasonable - * for us to reuse any daemons that were spawned by any member - * of our extended family. We can find all of our family members - * by first finding our root job, and then getting all of its - * descendants - */ - if (ORTE_SUCCESS != (rc = orte_ns.get_root_job(&root, job))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (ORTE_SUCCESS != (rc = orte_ns.get_job_descendants(&descendants, &ndesc, root))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* loop through the descendants, adding to the daemon list as we go */ - for (i=0; i < ndesc; i++) { - if (ORTE_SUCCESS != (rc = orte_pls_base_get_active_daemons(daemons, descendants[i], NULL))) { - ORTE_ERROR_LOG(rc); - free(descendants); - return rc; - } - } - free(descendants); /* all done with these */ - - /* now add in any persistent daemons - they are tagged as bootproxies - * for jobid = 0 - */ - if (ORTE_SUCCESS != (rc = orte_pls_base_get_active_daemons(daemons, 0, NULL))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - return ORTE_SUCCESS; -} - diff --git a/orte/mca/pls/bproc/pls_bproc.c b/orte/mca/pls/bproc/pls_bproc.c index 07c601c09e..114fe9adca 100644 --- a/orte/mca/pls/bproc/pls_bproc.c +++ b/orte/mca/pls/bproc/pls_bproc.c @@ -423,16 +423,6 @@ static void orte_pls_bproc_setup_env(char *** env) /** * Launches the daemons - * @param cellid the cellid of the job - * @param envp a pointer to the environment to use for the daemons - * @param node_arrays an array that holds the node arrays for each app context - * @param node_array_lens an array of lengths of the node arrays - * @param num_contexts the number of application contexts - * @param num_procs the numer of processes in the job - * @param global_vpid_start the starting vpid for the user's processes - * @param jobid the jobid for the user processes - * @retval ORTE_SUCCESS - * @retval error */ /* When working in this function, ALWAYS jump to "cleanup" if * you encounter an error so that orterun will be woken up and @@ -529,7 +519,7 @@ static int orte_pls_bproc_launch_daemons(orte_job_map_t *map, char ***envp) { free(var); /* set up the base environment so the daemons can get their names once launched */ - rc = orte_ns_nds_bproc_put(ORTE_PROC_MY_NAME->cellid, 0, map->daemon_vpid_start, + rc = orte_ns_nds_bproc_put(0, map->daemon_vpid_start, 0, num_daemons, ORTE_VPID_INVALID, 1, envp); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); @@ -636,7 +626,7 @@ static int orte_pls_bproc_launch_daemons(orte_job_map_t *map, char ***envp) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); goto cleanup; } - rc = orte_pls_bproc_set_node_pid(ORTE_PROC_MY_NAME->cellid, param, map->job, pids[i]); + rc = orte_pls_bproc_set_node_pid(param, map->job, pids[i]); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; @@ -774,17 +764,6 @@ orte_pls_bproc_node_failed(orte_gpr_notify_message_t *msg) /** * Launches the application processes - * @param cellid the cellid of the job - * @param jobid the jobid of the job - * @param map a pointer to the mapping of this application - * @param num_processes the number of processes in this job - * @param vpid_start the starting vpid for this app context - * @param global_vpid_start the starting vpid for the user's processes - * @param app_context the application context number - * @param node_array the node array for this context - * @param node_array_len the length of the node array - * @retval ORTE_SUCCESS - * @retval error */ /* When working in this function, ALWAYS jump to "cleanup" if @@ -908,7 +887,7 @@ static int orte_pls_bproc_launch_app(orte_job_map_t* map, int num_slots, } /* setup environment so the procs can figure out their names */ - rc = orte_ns_nds_bproc_put(ORTE_PROC_MY_NAME->cellid, map->job, vpid_start, map->vpid_start, + rc = orte_ns_nds_bproc_put(map->job, vpid_start, map->vpid_start, num_processes, i, num_cycles, &env); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); @@ -964,7 +943,7 @@ static int orte_pls_bproc_launch_app(orte_job_map_t* map, int num_slots, ORTE_ERROR_LOG(rc); goto cleanup; } else { - rc = orte_ns.create_process_name(&proc_name, ORTE_PROC_MY_NAME->cellid, map->job, + rc = orte_ns.create_process_name(&proc_name, map->job, vpid_start + j*stride); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); diff --git a/orte/mca/pls/bproc/pls_bproc.h b/orte/mca/pls/bproc/pls_bproc.h index eaebd74b55..ad541d3d5b 100644 --- a/orte/mca/pls/bproc/pls_bproc.h +++ b/orte/mca/pls/bproc/pls_bproc.h @@ -93,7 +93,7 @@ ORTE_DECLSPEC int orte_pls_bproc_get_proc_pids(orte_jobid_t jobid, pid_t** pids, /** * Utility routine to get/set daemon pid */ -ORTE_DECLSPEC int orte_pls_bproc_set_node_pid(orte_cellid_t cellid, char* node_name, orte_jobid_t jobid, pid_t pid); +ORTE_DECLSPEC int orte_pls_bproc_set_node_pid(char* node_name, orte_jobid_t jobid, pid_t pid); ORTE_DECLSPEC int orte_pls_bproc_get_node_pids(orte_jobid_t jobid, pid_t** pids, orte_std_cntr_t* num_pids); /* utility functions for abort communications */ diff --git a/orte/mca/pls/bproc/pls_bproc_state.c b/orte/mca/pls/bproc/pls_bproc_state.c index e5adcd361b..e0525a5c8e 100644 --- a/orte/mca/pls/bproc/pls_bproc_state.c +++ b/orte/mca/pls/bproc/pls_bproc_state.c @@ -188,7 +188,7 @@ cleanup: * the daemons. */ -int orte_pls_bproc_set_node_pid(orte_cellid_t cellid, char* node_name, orte_jobid_t jobid, pid_t pid) +int orte_pls_bproc_set_node_pid(char* node_name, orte_jobid_t jobid, pid_t pid) { orte_gpr_value_t *values[1]; char *jobid_string, *key; @@ -202,7 +202,7 @@ int orte_pls_bproc_set_node_pid(orte_cellid_t cellid, char* node_name, orte_jobi return ORTE_ERR_OUT_OF_RESOURCE; } - if (ORTE_SUCCESS != (rc = orte_schema.get_node_tokens(&(values[0]->tokens), &(values[0]->num_tokens), cellid, node_name))) { + if (ORTE_SUCCESS != (rc = orte_schema.get_node_tokens(&(values[0]->tokens), &(values[0]->num_tokens), node_name))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(values[0]); return rc; diff --git a/orte/mca/pls/lsf/pls_lsf_module.c b/orte/mca/pls/lsf/pls_lsf_module.c index 4e046ef7a9..260c61e2c8 100644 --- a/orte/mca/pls/lsf/pls_lsf_module.c +++ b/orte/mca/pls/lsf/pls_lsf_module.c @@ -220,7 +220,6 @@ static int pls_lsf_launch_job(orte_jobid_t jobid) /* tell the new daemons the base of the name list so they can compute * their own name on the other end */ - name.cellid = ORTE_PROC_MY_NAME->cellid; name.jobid = 0; name.vpid = map->daemon_vpid_start; rc = orte_ns.get_proc_name_string(&name_string, &name); diff --git a/orte/mca/pls/poe/pls_poe_module.c b/orte/mca/pls/poe/pls_poe_module.c index 24a1219000..a0bce751ce 100644 --- a/orte/mca/pls/poe/pls_poe_module.c +++ b/orte/mca/pls/poe/pls_poe_module.c @@ -214,7 +214,7 @@ int pls_poe_launch_interactive_orted(orte_jobid_t jobid) fprintf(hfp,"%s\n",node->node_name); /* initialize daemons process name */ - rc = orte_ns.create_process_name(&name, node->node_cellid, 0, vpid); + rc = orte_ns.create_process_name(&name, 0, vpid); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; diff --git a/orte/mca/pls/slurm/pls_slurm_module.c b/orte/mca/pls/slurm/pls_slurm_module.c index c85c49ec4a..20cffbdd79 100644 --- a/orte/mca/pls/slurm/pls_slurm_module.c +++ b/orte/mca/pls/slurm/pls_slurm_module.c @@ -256,7 +256,6 @@ static int pls_slurm_launch_job(orte_jobid_t jobid) /* tell the new daemons the base of the name list so they can compute * their own name on the other end */ - name.cellid = ORTE_PROC_MY_NAME->cellid; name.jobid = 0; name.vpid = map->daemon_vpid_start; rc = orte_ns.get_proc_name_string(&name_string, &name); diff --git a/orte/mca/pls/xcpu/pls_xcpu.c b/orte/mca/pls/xcpu/pls_xcpu.c index 51e07c86ab..b224d42334 100644 --- a/orte/mca/pls/xcpu/pls_xcpu.c +++ b/orte/mca/pls/xcpu/pls_xcpu.c @@ -240,7 +240,6 @@ orte_pls_xcpu_launch_job(orte_jobid_t jobid) { int i, fanout, rc; int num_processes = 0; - orte_cellid_t cellid; opal_list_item_t *node_item, *proc_item; orte_job_map_t *map; orte_vpid_t vpid_start, vpid_range; @@ -261,9 +260,6 @@ orte_pls_xcpu_launch_job(orte_jobid_t jobid) return rc; } - /* get the cellid */ - cellid = orte_process_info.my_name->cellid; - /* create num_apps of pointers to Xpnodeset and Xpcommand */ node_sets = (Xpnodeset **) malloc(num_apps * sizeof(Xpnodeset *)); xcmd_sets = (Xpcommand **) malloc(num_apps * sizeof(Xpcommand *)); @@ -298,7 +294,7 @@ orte_pls_xcpu_launch_job(orte_jobid_t jobid) } for (i = 0; i < num_apps; i++) { - rc = orte_ns_nds_xcpu_put(cellid, jobid, vpid_start, + rc = orte_ns_nds_xcpu_put(jobid, vpid_start, num_processes, &map->apps[i]->env); if (rc != ORTE_SUCCESS) { ORTE_ERROR_LOG(rc); diff --git a/orte/mca/ras/base/data_type_support/ras_data_type_compare_fns.c b/orte/mca/ras/base/data_type_support/ras_data_type_compare_fns.c index 604f792678..b8be9d69e1 100644 --- a/orte/mca/ras/base/data_type_support/ras_data_type_compare_fns.c +++ b/orte/mca/ras/base/data_type_support/ras_data_type_compare_fns.c @@ -33,10 +33,7 @@ int orte_ras_base_compare_node(orte_ras_node_t *value1, orte_ras_node_t *value2, { int test; - if (value1->node_cellid > value2->node_cellid) return ORTE_VALUE1_GREATER; - if (value2->node_cellid > value1->node_cellid) return ORTE_VALUE2_GREATER; - - /** same cell - check node names */ + /** check node names */ test = strcmp(value1->node_name, value2->node_name); if (0 == test) return ORTE_EQUAL; if (0 < test) return ORTE_VALUE2_GREATER; diff --git a/orte/mca/ras/base/data_type_support/ras_data_type_copy_fns.c b/orte/mca/ras/base/data_type_support/ras_data_type_copy_fns.c index 2c2022d0a8..a740ffc481 100644 --- a/orte/mca/ras/base/data_type_support/ras_data_type_copy_fns.c +++ b/orte/mca/ras/base/data_type_support/ras_data_type_copy_fns.c @@ -47,7 +47,6 @@ int orte_ras_base_copy_node(orte_ras_node_t **dest, orte_ras_node_t *src, orte_d if (NULL != src->node_name) (*dest)->node_name = strdup(src->node_name); (*dest)->launch_id = src->launch_id; if (NULL != src->node_arch) (*dest)->node_arch = strdup(src->node_arch); - (*dest)->node_cellid = src->node_cellid; (*dest)->node_state = src->node_state; (*dest)->node_slots = src->node_slots; (*dest)->node_slots_inuse = src->node_slots_inuse; diff --git a/orte/mca/ras/base/data_type_support/ras_data_type_packing_fns.c b/orte/mca/ras/base/data_type_support/ras_data_type_packing_fns.c index 4103cb4be2..4b028cc200 100644 --- a/orte/mca/ras/base/data_type_support/ras_data_type_packing_fns.c +++ b/orte/mca/ras/base/data_type_support/ras_data_type_packing_fns.c @@ -65,13 +65,6 @@ int orte_ras_base_pack_node(orte_buffer_t *buffer, const void *src, return rc; } - /* pack the cellid */ - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, - (void*)(&(nodes[i]->node_cellid)), 1, ORTE_CELLID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* pack the state */ if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, (void*)(&(nodes[i]->node_state)), 1, ORTE_NODE_STATE))) { diff --git a/orte/mca/ras/base/data_type_support/ras_data_type_print_fns.c b/orte/mca/ras/base/data_type_support/ras_data_type_print_fns.c index bd02f4981b..42f237f605 100644 --- a/orte/mca/ras/base/data_type_support/ras_data_type_print_fns.c +++ b/orte/mca/ras/base/data_type_support/ras_data_type_print_fns.c @@ -48,8 +48,8 @@ int orte_ras_base_print_node(char **output, char *prefix, orte_ras_node_t *src, asprintf(&pfx2, "%s", prefix); } - asprintf(&tmp, "%sData for node: cellid: %lu\tName: %s\tLaunch id: %ld", - pfx2, (unsigned long)src->node_cellid, src->node_name, (long)src->launch_id); + asprintf(&tmp, "%sData for node: Name: %s\tLaunch id: %ld", + pfx2, src->node_name, (long)src->launch_id); asprintf(&tmp2, "%s\n%s\tArch: %s\tState: %lu", tmp, pfx2, src->node_arch, (unsigned long)src->node_state); diff --git a/orte/mca/ras/base/data_type_support/ras_data_type_unpacking_fns.c b/orte/mca/ras/base/data_type_support/ras_data_type_unpacking_fns.c index c42d44208f..9db9f110f2 100644 --- a/orte/mca/ras/base/data_type_support/ras_data_type_unpacking_fns.c +++ b/orte/mca/ras/base/data_type_support/ras_data_type_unpacking_fns.c @@ -77,14 +77,6 @@ int orte_ras_base_unpack_node(orte_buffer_t *buffer, void *dest, return rc; } - /* unpack the cellid */ - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, - (&(nodes[i]->node_cellid)), &n, ORTE_CELLID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* unpack the state */ n = 1; if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, diff --git a/orte/mca/ras/base/ras_base_no_ops.c b/orte/mca/ras/base/ras_base_no_ops.c index 7c095e1b67..46cd1dbcd5 100644 --- a/orte/mca/ras/base/ras_base_no_ops.c +++ b/orte/mca/ras/base/ras_base_no_ops.c @@ -51,7 +51,7 @@ int orte_ras_base_proc_query_alloc_no_op(opal_list_t* list) return ORTE_ERR_NOT_SUPPORTED; } -orte_ras_node_t* orte_ras_base_node_lookup_no_op(orte_cellid_t cell, const char* nodename) +orte_ras_node_t* orte_ras_base_node_lookup_no_op(const char* nodename) { return NULL; } diff --git a/orte/mca/ras/base/ras_base_node.c b/orte/mca/ras/base/ras_base_node.c index 0b8acb94d5..6dc000ba80 100644 --- a/orte/mca/ras/base/ras_base_node.c +++ b/orte/mca/ras/base/ras_base_node.c @@ -37,7 +37,6 @@ static void orte_ras_base_node_construct(orte_ras_node_t* node) node->node_name = NULL; node->launch_id = -1; node->node_arch = NULL; - node->node_cellid = 0; node->node_state = ORTE_NODE_STATE_UNKNOWN; node->node_slots = 0; node->node_slots_inuse = 0; @@ -108,12 +107,10 @@ int orte_ras_base_node_query(opal_list_t* nodes) ORTE_NODE_SLOTS_ALLOC_KEY, ORTE_NODE_SLOTS_MAX_KEY, ORTE_NODE_USERNAME_KEY, - ORTE_CELLID_KEY, NULL }; orte_std_cntr_t i, cnt, *sptr; orte_node_state_t *nsptr; - orte_cellid_t *cptr; int32_t *i32; orte_gpr_value_t** values; int rc; @@ -217,14 +214,6 @@ int orte_ras_base_node_query(opal_list_t* nodes) } continue; } - if(strcmp(keyval->key, ORTE_CELLID_KEY) == 0) { - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&cptr, keyval->value, ORTE_CELLID))) { - ORTE_ERROR_LOG(rc); - continue; - } - node->node_cellid = *cptr; - continue; - } } opal_list_append(nodes, &node->super); OBJ_RELEASE(value); @@ -324,7 +313,6 @@ int orte_ras_base_node_query_alloc(opal_list_t* nodes, orte_jobid_t jobid) ORTE_NODE_SLOTS_ALLOC_KEY, ORTE_NODE_SLOTS_MAX_KEY, ORTE_NODE_USERNAME_KEY, - ORTE_CELLID_KEY, NULL }; orte_std_cntr_t i, cnt, keys_len; @@ -332,7 +320,6 @@ int orte_ras_base_node_query_alloc(opal_list_t* nodes, orte_jobid_t jobid) char* jobid_str; orte_std_cntr_t *sptr; orte_node_state_t *nsptr; - orte_cellid_t *cptr; int32_t *i32; int rc, alloc_key_posn=5; @@ -459,15 +446,7 @@ int orte_ras_base_node_query_alloc(opal_list_t* nodes, orte_jobid_t jobid) } continue; } - if(strcmp(keyval->key, ORTE_CELLID_KEY) == 0) { - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&cptr, keyval->value, ORTE_CELLID))) { - ORTE_ERROR_LOG(rc); - continue; - } - node->node_cellid = *cptr; - continue; - } - } + } /* check to see if any slots were reserved on this node for us * The "get" command will return data from ALL nodes on the node * segment. We ONLY want to include here nodes that are assigned @@ -494,7 +473,7 @@ int orte_ras_base_node_query_alloc(opal_list_t* nodes, orte_jobid_t jobid) * Query the registry for a specific node */ -orte_ras_node_t* orte_ras_base_node_lookup(orte_cellid_t cellid, const char* node_name) +orte_ras_node_t* orte_ras_base_node_lookup(const char* node_name) { char* keys[] = { ORTE_NODE_NAME_KEY, @@ -506,20 +485,18 @@ orte_ras_node_t* orte_ras_base_node_lookup(orte_cellid_t cellid, const char* nod ORTE_NODE_SLOTS_ALLOC_KEY, ORTE_NODE_SLOTS_MAX_KEY, ORTE_NODE_USERNAME_KEY, - ORTE_CELLID_KEY, NULL }; orte_ras_node_t* node = NULL; orte_std_cntr_t i, cnt, num_tokens; orte_std_cntr_t *sptr; - orte_cellid_t *cptr; orte_node_state_t *nsptr; int32_t *i32; orte_gpr_value_t** values; char** tokens = NULL; int rc; - rc = orte_schema.get_node_tokens(&tokens, &num_tokens, cellid, (char*)node_name); + rc = orte_schema.get_node_tokens(&tokens, &num_tokens, (char*)node_name); if(ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return NULL; @@ -624,14 +601,6 @@ orte_ras_node_t* orte_ras_base_node_lookup(orte_cellid_t cellid, const char* nod } continue; } - if(strcmp(keyval->key, ORTE_CELLID_KEY) == 0) { - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&cptr, keyval->value, ORTE_CELLID))) { - ORTE_ERROR_LOG(rc); - continue; - } - node->node_cellid = *cptr; - continue; - } } OBJ_RELEASE(values[i]); break; @@ -658,7 +627,6 @@ int orte_ras_base_node_insert(opal_list_t* nodes) ORTE_NODE_LAUNCH_ID_KEY, ORTE_NODE_ARCH_KEY, ORTE_NODE_STATE_KEY, - ORTE_CELLID_KEY, ORTE_NODE_SLOTS_KEY, ORTE_NODE_SLOTS_IN_USE_KEY, ORTE_NODE_SLOTS_MAX_KEY, @@ -669,7 +637,6 @@ int orte_ras_base_node_insert(opal_list_t* nodes) ORTE_INT32, ORTE_STRING, ORTE_NODE_STATE, - ORTE_CELLID, ORTE_STD_CNTR, ORTE_STD_CNTR, ORTE_STD_CNTR, @@ -699,7 +666,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes) for (i=0; i < num_values; i++) { if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[i]), ORTE_GPR_OVERWRITE | ORTE_GPR_TOKENS_AND, - ORTE_NODE_SEGMENT, 9, 0))) { + ORTE_NODE_SEGMENT, 8, 0))) { ORTE_ERROR_LOG(rc); for (j=0; j < i; j++) { OBJ_RELEASE(values[j]); @@ -738,12 +705,6 @@ int orte_ras_base_node_insert(opal_list_t* nodes) goto cleanup; } - ++j; - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[i]->keyvals[j]), keys[j], types[j], &(node->node_cellid)))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - ++j; if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(values[i]->keyvals[j]), keys[j], types[j], &(node->node_slots)))) { ORTE_ERROR_LOG(rc); @@ -769,7 +730,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes) } /* setup index/keys for this node */ - rc = orte_schema.get_node_tokens(&(values[i]->tokens), &(values[i]->num_tokens), node->node_cellid, node->node_name); + rc = orte_schema.get_node_tokens(&(values[i]->tokens), &(values[i]->num_tokens), node->node_name); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; @@ -791,11 +752,11 @@ cleanup: -int orte_ras_base_proc_insert(opal_list_t* procs, orte_cellid_t cellid, orte_jobid_t jobid) +int orte_ras_base_proc_insert(opal_list_t* procs, orte_jobid_t jobid) { opal_list_item_t* item; orte_gpr_value_t **values; - orte_process_name_t *proc_name; + orte_process_name_t proc_name; int rc; orte_std_cntr_t num_values, i, j; char *keys[] = { @@ -842,6 +803,8 @@ int orte_ras_base_proc_insert(opal_list_t* procs, orte_cellid_t cellid, orte_job return rc; } } + + proc_name.jobid = jobid; for(i=0, item = opal_list_get_first(procs); i < num_values && item != opal_list_get_end(procs); i++, item = opal_list_get_next(item)) { @@ -866,14 +829,10 @@ int orte_ras_base_proc_insert(opal_list_t* procs, orte_cellid_t cellid, orte_job } ++j; - rc = orte_ns.create_process_name(&proc_name, cellid, jobid, i); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } /* setup index/keys for this node */ - rc = orte_schema.get_proc_tokens(&(values[i]->tokens), &(values[i]->num_tokens), proc_name); + proc_name.vpid = (orte_vpid_t)i; + rc = orte_schema.get_proc_tokens(&(values[i]->tokens), &(values[i]->num_tokens), &proc_name); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; @@ -918,7 +877,7 @@ int orte_ras_base_node_delete(opal_list_t* nodes) node = (orte_ras_node_t*)item; /* setup index/keys for this node */ - rc = orte_schema.get_node_tokens(&tokens, &num_tokens, node->node_cellid, node->node_name); + rc = orte_schema.get_node_tokens(&tokens, &num_tokens, node->node_name); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); return rc; @@ -999,7 +958,7 @@ int orte_ras_base_node_assign(opal_list_t* nodes, orte_jobid_t jobid) continue; /* setup index/keys for this node */ - rc = orte_schema.get_node_tokens(&(values[i]->tokens), &(values[i]->num_tokens), node->node_cellid, node->node_name); + rc = orte_schema.get_node_tokens(&(values[i]->tokens), &(values[i]->num_tokens), node->node_name); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); free(jobid_str); diff --git a/orte/mca/ras/base/ras_private.h b/orte/mca/ras/base/ras_private.h index 892a9e3d5c..04a8323048 100644 --- a/orte/mca/ras/base/ras_private.h +++ b/orte/mca/ras/base/ras_private.h @@ -71,7 +71,7 @@ int orte_ras_base_node_query_alloc_no_op(opal_list_t*, orte_jobid_t); int orte_ras_base_proc_query_alloc_no_op(opal_list_t* procs); -orte_ras_node_t* orte_ras_base_node_lookup_no_op(orte_cellid_t, const char* nodename); +orte_ras_node_t* orte_ras_base_node_lookup_no_op(const char* nodename); /* * Internal support functions @@ -92,7 +92,7 @@ ORTE_DECLSPEC int orte_ras_base_node_query(opal_list_t*); /* * Query the registry for a specific node */ -ORTE_DECLSPEC orte_ras_node_t* orte_ras_base_node_lookup(orte_cellid_t, const char* nodename); +ORTE_DECLSPEC orte_ras_node_t* orte_ras_base_node_lookup(const char* nodename); /** * Query the registry for all nodes allocated to a specific job @@ -106,7 +106,7 @@ ORTE_DECLSPEC int orte_ras_base_proc_query_alloc(opal_list_t* procs); */ ORTE_DECLSPEC int orte_ras_base_node_insert(opal_list_t*); -ORTE_DECLSPEC int orte_ras_base_proc_insert(opal_list_t* procs, orte_cellid_t cellid, orte_jobid_t jobid); +ORTE_DECLSPEC int orte_ras_base_proc_insert(opal_list_t* procs, orte_jobid_t jobid); /** * Delete the specified nodes from the registry diff --git a/orte/mca/ras/dash_host/ras_dash_host_module.c b/orte/mca/ras/dash_host/ras_dash_host_module.c index 006f9bb444..60ec316cc0 100644 --- a/orte/mca/ras/dash_host/ras_dash_host_module.c +++ b/orte/mca/ras/dash_host/ras_dash_host_module.c @@ -149,9 +149,6 @@ static int orte_ras_dash_host_allocate(orte_jobid_t jobid, opal_list_t *attribut node->node_name = strdup(mapped_nodes[i]); node->node_arch = NULL; node->node_state = ORTE_NODE_STATE_UP; - /* JMS: this should not be hard-wired to 0, but there's no - other value to put it to [yet]... */ - node->node_cellid = 0; node->node_slots_inuse = 0; node->node_slots_max = 0; node->node_slots = 1; diff --git a/orte/mca/ras/gridengine/ras_gridengine_module.c b/orte/mca/ras/gridengine/ras_gridengine_module.c index aee3489348..fbb4a49c26 100644 --- a/orte/mca/ras/gridengine/ras_gridengine_module.c +++ b/orte/mca/ras/gridengine/ras_gridengine_module.c @@ -198,7 +198,6 @@ static int orte_ras_gridengine_discover(opal_list_t* nodelist, node->node_name = strdup(ptr); node->node_arch = strdup(arch); node->node_state = ORTE_NODE_STATE_UP; - node->node_cellid = 0; node->node_slots_inuse = 0; node->node_slots_max = 0; node->node_slots = (int)strtol(num, (char **)NULL, 10); @@ -303,7 +302,7 @@ static int put_slot_keyval(orte_ras_node_t* node, int slot_cnt) /* get token */ if (ORTE_SUCCESS != (rc = orte_schema.get_node_tokens(&tokens, - &num_tokens, node->node_cellid, node->node_name))) { + &num_tokens, node->node_name))) { ORTE_ERROR_LOG(rc); return rc; } @@ -337,7 +336,7 @@ static int get_slot_keyval(orte_ras_node_t* node, int* slot_cnt) { /* get token */ if (ORTE_SUCCESS != (rc = orte_schema.get_node_tokens(&tokens, - &num_tokens, node->node_cellid, node->node_name))) { + &num_tokens, node->node_name))) { ORTE_ERROR_LOG(rc); goto cleanup; } diff --git a/orte/mca/ras/loadleveler/ras_loadleveler_module.c b/orte/mca/ras/loadleveler/ras_loadleveler_module.c index 94e55ab597..249e378275 100644 --- a/orte/mca/ras/loadleveler/ras_loadleveler_module.c +++ b/orte/mca/ras/loadleveler/ras_loadleveler_module.c @@ -119,7 +119,6 @@ static int orte_ras_loadleveler_allocate(orte_jobid_t jobid, opal_list_t *attrib node->node_name = strdup(hostlist[i]); node->node_arch = NULL; node->node_state = ORTE_NODE_STATE_UP; - node->node_cellid = 0; node->node_slots_inuse = 0; node->node_slots_max = 0; node->node_slots = 1; diff --git a/orte/mca/ras/localhost/ras_localhost_module.c b/orte/mca/ras/localhost/ras_localhost_module.c index de83b25839..97ccace534 100644 --- a/orte/mca/ras/localhost/ras_localhost_module.c +++ b/orte/mca/ras/localhost/ras_localhost_module.c @@ -102,9 +102,6 @@ static int orte_ras_localhost_allocate(orte_jobid_t jobid, opal_list_t *attribut node->node_name = strdup(orte_system_info.nodename); node->node_arch = NULL; node->node_state = ORTE_NODE_STATE_UP; - /* JMS: this should not be hard-wired to 0, but there's no - other value to put it to [yet]... */ - node->node_cellid = 0; node->node_slots_inuse = 0; node->node_slots_max = 0; node->node_slots = 1; diff --git a/orte/mca/ras/ras.h b/orte/mca/ras/ras.h index 446ef1ec5c..116cf9d18c 100644 --- a/orte/mca/ras/ras.h +++ b/orte/mca/ras/ras.h @@ -100,9 +100,6 @@ * beginning at node 512, then the RAS can derive the nodenames from * this information). * - * - cellid: the id assigned to the cell within which this node - * exists. This id is provided in the ORTE_RESOURCE_SEGMENT. - * * For each node, the RAS stores the following information on the segment: * * - number of cpus allocated from this node to the user. This will @@ -213,7 +210,7 @@ typedef int (*orte_ras_base_module_node_query_alloc_fn_t)(opal_list_t*, orte_job /* * Query the registry for a specific node */ -typedef orte_ras_node_t* (*orte_ras_base_module_node_lookup_fn_t)(orte_cellid_t, const char* nodename); +typedef orte_ras_node_t* (*orte_ras_base_module_node_lookup_fn_t)(const char* nodename); /** * ras module version 1.3.0 diff --git a/orte/mca/ras/ras_types.h b/orte/mca/ras/ras_types.h index c746a8f705..cc03e1b6b8 100644 --- a/orte/mca/ras/ras_types.h +++ b/orte/mca/ras/ras_types.h @@ -51,8 +51,6 @@ struct orte_ras_node_t { /** String of the architecture for the node. This is permitted to be NULL if it is not known. */ char *node_arch; - /** The cell ID of this node */ - orte_cellid_t node_cellid; /** State of this node; see include/orte_types.h */ orte_node_state_t node_state; /** A "soft" limit on the number of slots available on the node. diff --git a/orte/mca/ras/slurm/ras_slurm_module.c b/orte/mca/ras/slurm/ras_slurm_module.c index 4f4ac9caf4..fcb128e825 100644 --- a/orte/mca/ras/slurm/ras_slurm_module.c +++ b/orte/mca/ras/slurm/ras_slurm_module.c @@ -340,9 +340,6 @@ static int orte_ras_slurm_discover(char *regexp, char *tasks_per_node, node->node_name = strdup(names[i]); node->node_arch = NULL; node->node_state = ORTE_NODE_STATE_UP; - /* JMS: this should not be hard-wired to 0, but there's no - other value to put it to [yet]... */ - node->node_cellid = 0; node->node_slots_inuse = 0; node->node_slots_max = 0; node->node_slots = slots[i]; diff --git a/orte/mca/ras/tm/ras_tm_module.c b/orte/mca/ras/tm/ras_tm_module.c index 3f3d26f519..8b012eb281 100644 --- a/orte/mca/ras/tm/ras_tm_module.c +++ b/orte/mca/ras/tm/ras_tm_module.c @@ -214,7 +214,6 @@ static int discover(opal_list_t* nodelist, char *pbs_jobid) node = OBJ_NEW(orte_ras_node_t); node->node_name = hostname; node->launch_id = nodeid; - node->node_cellid = 0; node->node_slots_inuse = 0; node->node_slots_max = 0; node->node_slots = 1; diff --git a/orte/mca/ras/xgrid/src/ras_xgrid_module.c b/orte/mca/ras/xgrid/src/ras_xgrid_module.c index 3d1d168aef..2278ad3e5d 100644 --- a/orte/mca/ras/xgrid/src/ras_xgrid_module.c +++ b/orte/mca/ras/xgrid/src/ras_xgrid_module.c @@ -143,7 +143,6 @@ static int discover(orte_jobid_t jobid, opal_list_t* nodelist) node->node_name = hostname; node->node_arch = NULL; node->node_state = ORTE_NODE_STATE_UP; - node->node_cellid = 0; node->node_slots_inuse = 0; node->node_slots_max = 0; node->node_slots = 1; diff --git a/orte/mca/rds/base/rds_base_open.c b/orte/mca/rds/base/rds_base_open.c index f8a33d3052..0492d0938c 100644 --- a/orte/mca/rds/base/rds_base_open.c +++ b/orte/mca/rds/base/rds_base_open.c @@ -45,48 +45,6 @@ * Local functions. */ -static void orte_rds_base_cell_desc_constructor(orte_rds_cell_desc_t *cell) -{ - cell->site = NULL; - cell->name = NULL; - cell->type = NULL; - - OBJ_CONSTRUCT(&cell->attributes, opal_list_t); -} - -static void orte_rds_base_cell_desc_destructor(orte_rds_cell_desc_t *cell) -{ - if (NULL != cell->site) free(cell->site); - if (NULL != cell->name) free(cell->name); - if (NULL != cell->type) free(cell->type); - - OBJ_DESTRUCT(&cell->attributes); -} - -OBJ_CLASS_INSTANCE( - orte_rds_cell_desc_t, - opal_list_item_t, - orte_rds_base_cell_desc_constructor, - orte_rds_base_cell_desc_destructor); - - -static void orte_rds_base_cell_attr_constructor(orte_rds_cell_attr_t *cell) -{ - OBJ_CONSTRUCT(&cell->keyval, orte_gpr_keyval_t); -} - -static void orte_rds_base_cell_attr_destructor(orte_rds_cell_attr_t *cell) -{ - OBJ_DESTRUCT(&cell->keyval); -} - -OBJ_CLASS_INSTANCE( - orte_rds_cell_attr_t, - opal_list_item_t, - orte_rds_base_cell_attr_constructor, - orte_rds_base_cell_attr_destructor); - - /* * Global variables */ diff --git a/orte/mca/rds/base/rds_base_registry_fns.c b/orte/mca/rds/base/rds_base_registry_fns.c index 2eda300d70..c2c442d0c5 100644 --- a/orte/mca/rds/base/rds_base_registry_fns.c +++ b/orte/mca/rds/base/rds_base_registry_fns.c @@ -38,6 +38,7 @@ int orte_rds_base_store_resource(opal_list_t *resources) { +#if 0 orte_rds_cell_desc_t *cell; opal_list_item_t *item; orte_gpr_value_t **values; @@ -106,4 +107,6 @@ CLEANUP: if (NULL != values) free(values); return rc; +#endif + return ORTE_SUCCESS; } diff --git a/orte/mca/rds/hostfile/rds_hostfile.c b/orte/mca/rds/hostfile/rds_hostfile.c index 3d5e4e9e89..0d24871cc1 100644 --- a/orte/mca/rds/hostfile/rds_hostfile.c +++ b/orte/mca/rds/hostfile/rds_hostfile.c @@ -45,8 +45,6 @@ #include "orte/mca/rds/hostfile/rds_hostfile_lex.h" static bool orte_rds_hostfile_queried = false; -static orte_cellid_t local_cellid; -static bool need_cellid = true; static char *cur_hostfile_name = NULL; static void orte_rds_hostfile_parse_error(int token) @@ -207,7 +205,7 @@ static int orte_rds_hostfile_parse_line(int token, opal_list_t* existing, opal_l username = strdup(argv[0]); node_name = strdup(argv[1]); } else { - opal_output(0, "WARNING: Unhandeled user@host-combination\n"); /* XXX */ + opal_output(0, "WARNING: Unhandled user@host-combination\n"); /* XXX */ } opal_argv_free (argv); @@ -231,22 +229,6 @@ static int orte_rds_hostfile_parse_line(int token, opal_list_t* existing, opal_l node->node_name = node_name; node->node_username = username; node->node_slots = 0; - -#if 0 - /* get a new cellid for this node */ - /* JMS Temporarily turned off until cell IDs are - properly handled elsewhere in the code */ - /* JJH This assumes that each hostname listed should be - placed in a new cell. Is this accurate to the design? - */ - if (ORTE_SUCCESS != - (rc = orte_ns.create_cellid(&(node->node_cellid), - "UNKNOWN-SITE", - node->node_name))) { - ORTE_ERROR_LOG(rc); - return rc; - } -#endif } /* Note that we need to set update to true regardless of @@ -259,12 +241,6 @@ static int orte_rds_hostfile_parse_line(int token, opal_list_t* existing, opal_l update = true; } - else { - /* If it was in the existing list, then we can use its cellid - * to add the reset of the hosts in the file to. */ - local_cellid = node->node_cellid; - need_cellid = false; - } } else { orte_rds_hostfile_parse_error(token); return ORTE_ERROR; @@ -477,11 +453,8 @@ unlock: static int orte_rds_hostfile_query(orte_jobid_t job) { opal_list_t existing; - opal_list_t updates, rds_updates, procs; + opal_list_t updates, procs; opal_list_item_t *item; - orte_rds_cell_desc_t *rds_item; - orte_rds_cell_attr_t *new_attr; - orte_ras_node_t *ras_item; int rc; if (orte_rds_hostfile_queried) { @@ -499,7 +472,6 @@ static int orte_rds_hostfile_query(orte_jobid_t job) OBJ_CONSTRUCT(&existing, opal_list_t); OBJ_CONSTRUCT(&updates, opal_list_t); - OBJ_CONSTRUCT(&rds_updates, opal_list_t); OBJ_CONSTRUCT(&procs, opal_list_t); rc = orte_ras_base_node_query(&existing); if(ORTE_SUCCESS != rc) { @@ -525,78 +497,7 @@ static int orte_rds_hostfile_query(orte_jobid_t job) if ( !opal_list_is_empty(&updates) ) { - /* Convert RAS update list to RDS update list */ - for ( ras_item = (orte_ras_node_t*)opal_list_get_first(&updates); - ras_item != (orte_ras_node_t*)opal_list_get_end(&updates); - ras_item = (orte_ras_node_t*)opal_list_get_next(ras_item)) { - - rds_item = OBJ_NEW(orte_rds_cell_desc_t); - if (NULL == rds_item) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - rds_item->site = strdup("Hostfile"); - rds_item->name = strdup(ras_item->node_name); - if (need_cellid) { - /* need to store the info for this hostfile so the NS can get it - * later when requested - */ - local_cellid = 0; - rc = orte_ns.create_cellid(&local_cellid, rds_item->site, rds_item->name); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - return rc; - } - need_cellid = false; - } - - rds_item->cellid = local_cellid; - ras_item->node_cellid = local_cellid; - - new_attr = OBJ_NEW(orte_rds_cell_attr_t); - if (NULL == new_attr) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - new_attr->keyval.key = strdup(ORTE_RDS_NAME); - new_attr->keyval.value = OBJ_NEW(orte_data_value_t); - if (NULL == new_attr->keyval.value) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - new_attr->keyval.value->type = ORTE_STRING; - new_attr->keyval.value->data = strdup(ras_item->node_name); - opal_list_append(&(rds_item->attributes), &new_attr->super); - - new_attr = OBJ_NEW(orte_rds_cell_attr_t); - if (NULL == new_attr) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - new_attr->keyval.key = strdup(ORTE_CELLID_KEY); - new_attr->keyval.value = OBJ_NEW(orte_data_value_t); - if (NULL == new_attr->keyval.value) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - new_attr->keyval.value->type = ORTE_CELLID; - if (ORTE_SUCCESS != (rc = orte_dss.copy(&(new_attr->keyval.value->data), &(rds_item->cellid), ORTE_CELLID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - opal_list_append(&(rds_item->attributes), &new_attr->super); - - opal_list_append(&rds_updates, &rds_item->super); - } - - /* Insert the new node into the RDS */ - rc = orte_rds.store_resource(&rds_updates); - if (ORTE_SUCCESS != rc) { - goto cleanup; - } - - /* Then the RAS, since we can assume that any + /* Insert the results into the RAS, since we can assume that any * resources listed in the hostfile have been * already allocated for our use. */ @@ -618,7 +519,7 @@ static int orte_rds_hostfile_query(orte_jobid_t job) } } if ( !opal_list_is_empty(&procs) ) { - rc = orte_ras_base_proc_insert(&procs, local_cellid, job); + rc = orte_ras_base_proc_insert(&procs, job); if (ORTE_SUCCESS != rc) { goto cleanup; } @@ -641,17 +542,9 @@ cleanup: OBJ_RELEASE(item); } - while (NULL != (rds_item = (orte_rds_cell_desc_t*)opal_list_remove_first(&rds_updates))) { - while (NULL != (new_attr = (orte_rds_cell_attr_t*)opal_list_remove_first(&(rds_item->attributes)))) { - OBJ_RELEASE(new_attr); - } - OBJ_RELEASE(rds_item); - } - OBJ_DESTRUCT(&existing); OBJ_DESTRUCT(&updates); OBJ_DESTRUCT(&procs); - OBJ_DESTRUCT(&rds_updates); return rc; } diff --git a/orte/mca/rds/rds.h b/orte/mca/rds/rds.h index 84c444d59e..2e1078c308 100644 --- a/orte/mca/rds/rds.h +++ b/orte/mca/rds/rds.h @@ -61,23 +61,6 @@ * NOTE: It is the responsibility of each component to define unique MCA * parameters for passing their required input information. * - * The information developed by the RDS is placed on the ORTE_RESOURCE_SEGMENT of the - * registry. To ensure proper operation, the information MUST be described by the - * following tokens: - * - * - site name: the name of the site wherein the resource resides (e.g., "lanl") - * - * - resource name: the name of the specific resource being described (e.g., "pink") - * - * - cellid: a unique id for this resource (termed a "cell") that is generated by - * the ORTE name services (NS) subsystem. - * - * Keywords to describe the individual data elements within the resource are mostly - * defined in the rds_types.h - * file. However, some higher level types (e.g., nodename) may be defined in the - * include/orte_names.h - * file as they need to be generally understandable across multiple subsystems. - * * The resource allocation subsystem (RAS) is the primary consumer of this information, * although other subsystems can also access it. The information is typically read at * the start of any application, but can also be accessed dynamically when an application diff --git a/orte/mca/rds/rds_types.h b/orte/mca/rds/rds_types.h index 23b0fea4ec..d3198952fe 100644 --- a/orte/mca/rds/rds_types.h +++ b/orte/mca/rds/rds_types.h @@ -29,35 +29,6 @@ extern "C" { #endif -/* resource descriptor object */ -typedef struct { - /** Base object */ - opal_list_item_t super; - /** id of cell in which this resource resides */ - orte_cellid_t cellid; - /** string name of the site */ - char *site; - /** string name of the resource */ - char *name; - /** string type of the resource */ - char *type; - /** list of attributes */ - opal_list_t attributes; -} orte_rds_cell_desc_t; - -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_rds_cell_desc_t); - -/* resource attribute object */ -typedef struct { - /** Base object */ - opal_list_item_t super; - /** key-value pair describing attribute */ - orte_gpr_keyval_t keyval; -} orte_rds_cell_attr_t; - -ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_rds_cell_attr_t); - - /* name of resource */ #define ORTE_RDS_NAME "orte-rds-name" diff --git a/orte/mca/rds/resfile/Makefile.am b/orte/mca/rds/resfile/Makefile.am deleted file mode 100644 index 647df0df1c..0000000000 --- a/orte/mca/rds/resfile/Makefile.am +++ /dev/null @@ -1,52 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Use the top-level Makefile.options - - - -# Make the output library in this directory, and name it either -# mca__.la (for DSO builds) or libmca__.la -# (for static builds). - -if OMPI_BUILD_rds_resfile_DSO -component_noinst = -component_install = mca_rds_resfile.la -else -component_noinst = libmca_rds_resfile.la -component_install = -endif - -proxy_SOURCES = \ - rds_resfile.c \ - rds_resfile_parse_attributes.c \ - rds_resfile.h \ - rds_resfile_component.c - -mcacomponentdir = $(pkglibdir) -mcacomponent_LTLIBRARIES = $(component_install) -mca_rds_resfile_la_SOURCES = $(proxy_SOURCES) -mca_rds_resfile_la_LIBADD = \ - $(top_ompi_builddir)/orte/libopen-rte.la \ - $(top_ompi_builddir)/opal/libopen-pal.la -mca_rds_resfile_la_LDFLAGS = -module -avoid-version - -noinst_LTLIBRARIES = $(component_noinst) -libmca_rds_resfile_la_SOURCES = $(proxy_SOURCES) -libmca_rds_resfile_la_LIBADD = -libmca_rds_resfile_la_LDFLAGS = -module -avoid-version diff --git a/orte/mca/rds/resfile/configure.params b/orte/mca/rds/resfile/configure.params deleted file mode 100644 index 3513f8d956..0000000000 --- a/orte/mca/rds/resfile/configure.params +++ /dev/null @@ -1,24 +0,0 @@ -# -*- shell-script -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# Copyright (c) 2007 Los Alamos National Security, LLC. All rights -# reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -# Specific to this module - -PARAM_CONFIG_FILES="Makefile" diff --git a/orte/mca/rds/resfile/rds_resfile.c b/orte/mca/rds/resfile/rds_resfile.c deleted file mode 100644 index 099443e385..0000000000 --- a/orte/mca/rds/resfile/rds_resfile.c +++ /dev/null @@ -1,357 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "orte_config.h" - -#include - -#include "orte/orte_constants.h" -#include "opal/mca/base/mca_base_param.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/ns/ns.h" - -#include "orte/mca/rds/base/rds_private.h" -#include "orte/mca/rds/resfile/rds_resfile.h" - -#define ORTE_RDS_RESFILE_MAX_LINE_LENGTH 512 - -static opal_list_t orte_rds_resfile_resource_list; - -static int orte_rds_resfile_parse_site(char *site, FILE *fp); - -static int orte_rds_resfile_parse_resource(orte_rds_cell_desc_t *cell, FILE *fp); - -static int orte_rds_resfile_parse_resource(orte_rds_cell_desc_t *cell, FILE *fp) -{ - char *line; - bool name_given = false; - int rc; - orte_rds_cell_attr_t *na; - - while (NULL != (line = orte_rds_resfile_getline(fp))) { - /* check for end of resource description */ - if (0 == strncmp(line, "name = orte_rds_resfile_parse_field(line))) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_READ_FAILURE); - return ORTE_ERR_FILE_READ_FAILURE; - } - /* get new cellid for this site/resource */ - if (ORTE_SUCCESS != (rc = orte_ns.create_cellid(&(cell->cellid), cell->site, cell->name))) { - ORTE_ERROR_LOG(rc); - return rc; - } - na = OBJ_NEW(orte_rds_cell_attr_t); - if (NULL == na) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - na->keyval.key = strdup(ORTE_RDS_NAME); - na->keyval.value = OBJ_NEW(orte_data_value_t); - if (NULL == na->keyval.value) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - na->keyval.value->type = ORTE_STRING; - na->keyval.value->data = strdup(cell->name); - opal_list_append(&(cell->attributes), &na->super); - - na = OBJ_NEW(orte_rds_cell_attr_t); - if (NULL == na) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - na->keyval.key = strdup(ORTE_CELLID_KEY); - na->keyval.value = OBJ_NEW(orte_data_value_t); - if (NULL == na->keyval.value) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - na->keyval.value->type = ORTE_CELLID; - if (ORTE_SUCCESS != (rc = orte_dss.copy(&(na->keyval.value->data), &(cell->cellid), ORTE_CELLID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - opal_list_append(&(cell->attributes), &na->super); - - } else if (0 == strncmp(line, "type = orte_rds_resfile_parse_field(line))) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_READ_FAILURE); - return ORTE_ERR_FILE_READ_FAILURE; - } - na = OBJ_NEW(orte_rds_cell_attr_t); - if (NULL == na) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - na->keyval.key = strdup(ORTE_RDS_TYPE); - na->keyval.value = OBJ_NEW(orte_data_value_t); - if (NULL == na->keyval.value) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - na->keyval.value->type = ORTE_STRING; - na->keyval.value->data = strdup(cell->type); - opal_list_append(&(cell->attributes), &na->super); - - } else if (0 == strncmp(line, "keyval.key = strdup(ORTE_RDS_ALLOCATOR); - na->keyval.value = OBJ_NEW(orte_data_value_t); - if (NULL == na->keyval.value) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - na->keyval.value->type = ORTE_STRING; - if (NULL == (na->keyval.value->data = orte_rds_resfile_parse_field(line))) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_READ_FAILURE); - return ORTE_ERR_FILE_READ_FAILURE; - } - opal_list_append(&(cell->attributes), &na->super); - - } else if (0 == strncmp(line, "keyval.key = strdup(ORTE_RDS_LAUNCHER); - na->keyval.value = OBJ_NEW(orte_data_value_t); - if (NULL == na->keyval.value) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - na->keyval.value->type = ORTE_STRING; - if (NULL == (na->keyval.value->data = orte_rds_resfile_parse_field(line))) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_READ_FAILURE); - return ORTE_ERR_FILE_READ_FAILURE; - } - opal_list_append(&(cell->attributes), &na->super); - - } else if (0 == strncmp(line, "site = strdup(site); - /* parse the resource description */ - if (ORTE_SUCCESS != (rc = orte_rds_resfile_parse_resource(cell, fp))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - opal_list_append(&orte_rds_resfile_resource_list, &cell->super); - } - } - return ORTE_SUCCESS; -} - - -int orte_rds_resfile_query(orte_jobid_t job) -{ - int fileid, rc; - FILE *fp; - char *input_line, *site; - - if (orte_rds_resfile_queried) { - /* if we have previously been queried, then our info - * is already on the registry, so just return - */ - return ORTE_SUCCESS; - } - - OPAL_THREAD_LOCK(&mca_rds_resfile_component.lock); - - orte_rds_resfile_queried = true; - - /* get the resource filename */ - fileid = mca_base_param_find("rds", "resfile", "name"); - mca_base_param_lookup_string(fileid, &mca_rds_resfile_component.filename); - - if (NULL == mca_rds_resfile_component.filename) { /* no resource file provided */ - /* DO NOT ORTE_ERROR_LOG OR RETURN AN ERROR - THIS IS NOT AN ERROR CONDITION */ - OPAL_THREAD_UNLOCK(&mca_rds_resfile_component.lock); - return ORTE_SUCCESS; - } - - /* open the resource file */ - fp = fopen(mca_rds_resfile_component.filename, "r"); - if (NULL == fp) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - OPAL_THREAD_UNLOCK(&mca_rds_resfile_component.lock); - return ORTE_ERR_NOT_FOUND; - } - - /* setup the resource list */ - OBJ_CONSTRUCT(&orte_rds_resfile_resource_list, opal_list_t); - - /* dump the initial line containing the DOM */ - input_line = orte_rds_resfile_getline(fp); - if (NULL == input_line || 0 != strncmp(input_line, "", strlen(""))) { - /* extract the site name from input_line */ - if (NULL == (site = orte_rds_resfile_parse_field(input_line))) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_READ_FAILURE); - free(input_line); - rc = ORTE_ERR_FILE_READ_FAILURE; - goto CLEANUP; - } - if (ORTE_SUCCESS != (rc = orte_rds_resfile_parse_site(site, fp))) { - ORTE_ERROR_LOG(rc); - free(input_line); - goto CLEANUP; - } - } - free(input_line); - } - - /* place resource list on registry */ - if (ORTE_SUCCESS != (rc = orte_rds_base_store_resource(&orte_rds_resfile_resource_list))) { - ORTE_ERROR_LOG(rc); - return rc; - } - -CLEANUP: - fclose(fp); - OBJ_DESTRUCT(&orte_rds_resfile_resource_list); - - OPAL_THREAD_UNLOCK(&mca_rds_resfile_component.lock); - - return ORTE_SUCCESS; -} - - -char *orte_rds_resfile_getline(FILE *fp) -{ - int i; - char *ret, *buff, *start; - char input[ORTE_RDS_RESFILE_MAX_LINE_LENGTH]; - - /* find the next non-blank line, stop at end-of-file */ - ret = fgets(input, ORTE_RDS_RESFILE_MAX_LINE_LENGTH, fp); - while (NULL != ret) { - input[strlen(input)-1] = '\0'; /* remove newline */ - /* strip leading whitespace */ - for (i=0; i < (int)strlen(input) && (input[i] == ' ' || input[i] == '\t'); i++); - if (i < (int)strlen(input)) { - start = &input[i]; - buff = strdup(start); - return buff; - } - ret = fgets(input, ORTE_RDS_RESFILE_MAX_LINE_LENGTH, fp); - } - - return NULL; -} - - -char *orte_rds_resfile_parse_field(char *input) -{ - char *start, *stop, *ans; - - start = strchr(input, '>'); - if (NULL == start) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_READ_FAILURE); - return NULL; - } - start++; - stop = strchr(start, '<'); - if (NULL == stop) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_READ_FAILURE); - return NULL; - } - *stop = '\0'; /* terminate the string */ - - if (start == stop) { - return NULL; - } - - ans = strdup(start); - return ans; -} diff --git a/orte/mca/rds/resfile/rds_resfile.h b/orte/mca/rds/resfile/rds_resfile.h deleted file mode 100644 index 7e5931f210..0000000000 --- a/orte/mca/rds/resfile/rds_resfile.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** - * @file - * - * Resource Discovery (Hostfile) - */ -#ifndef ORTE_RDS_RESFILE_H -#define ORTE_RDS_RESFILE_H - -#include "orte_config.h" - -#include "opal/threads/mutex.h" - -#include "orte/mca/rds/base/base.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -/* - * RDS Resource file functions - */ -int orte_rds_resfile_query(orte_jobid_t job); - -int orte_rds_resfile_finalize(void); - -/* RDS resource file internal functions */ -char *orte_rds_resfile_getline(FILE *fp); - -char *orte_rds_resfile_parse_field(char *input); - -/* RDS resource file attribute parsers */ -int orte_rds_resfile_parse_fe(orte_rds_cell_desc_t *cell, FILE *fp); - -int orte_rds_resfile_parse_cd(orte_rds_cell_desc_t *cell, FILE *fp); - -int orte_rds_resfile_parse_os(orte_rds_cell_desc_t *cell, FILE *fp); - -int orte_rds_resfile_parse_fs(orte_rds_cell_desc_t *cell, FILE *fp); - -int orte_rds_resfile_parse_se(orte_rds_cell_desc_t *cell, FILE *fp); - -int orte_rds_resfile_parse_na(orte_rds_cell_desc_t *cell, FILE *fp); - -/** - * RDS Resource file Component - */ -struct orte_rds_resfile_component_t { - orte_rds_base_component_t super; - int debug; - char *filename; - opal_mutex_t lock; -}; -typedef struct orte_rds_resfile_component_t orte_rds_resfile_component_t; - -ORTE_MODULE_DECLSPEC extern orte_rds_resfile_component_t mca_rds_resfile_component; -extern orte_rds_base_module_t orte_rds_resfile_module; - -extern bool orte_rds_resfile_queried; - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif -#endif diff --git a/orte/mca/rds/resfile/rds_resfile_component.c b/orte/mca/rds/resfile/rds_resfile_component.c deleted file mode 100644 index adbc9d7a0b..0000000000 --- a/orte/mca/rds/resfile/rds_resfile_component.c +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" -#include "orte/orte_constants.h" - -#include "opal/mca/base/base.h" -#include "opal/mca/base/mca_base_param.h" -#include "opal/util/output.h" - -#include "orte/util/proc_info.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/ns/ns_types.h" - -#include "orte/mca/rds/base/rds_private.h" -#include "orte/mca/rds/resfile/rds_resfile.h" - -/* - * Local functions - */ - -static int orte_rds_resfile_open(void); -static int orte_rds_resfile_close(void); -static orte_rds_base_module_t* orte_rds_resfile_init(void); - -orte_rds_resfile_component_t mca_rds_resfile_component = { - { - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - - { - /* Indicate that we are a iof v1.0.0 component (which also - implies a specific MCA version) */ - - ORTE_RDS_BASE_VERSION_1_3_0, - - "resfile", /* MCA component name */ - ORTE_MAJOR_VERSION, /* MCA component major version */ - ORTE_MINOR_VERSION, /* MCA component minor version */ - ORTE_RELEASE_VERSION, /* MCA component release version */ - orte_rds_resfile_open, /* component open */ - orte_rds_resfile_close /* component close */ - }, - - /* Next the MCA v1.0.0 component meta data */ - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT - }, - - orte_rds_resfile_init, - orte_rds_resfile_finalize - } -}; - - -orte_rds_base_module_t orte_rds_resfile_module = { - orte_rds_resfile_query, - orte_rds_base_store_resource, -}; - -/* - * Instantiate component variables - */ -bool orte_rds_resfile_queried; - -/** - * component open/close/init function - */ -static int orte_rds_resfile_open(void) -{ - OBJ_CONSTRUCT(&mca_rds_resfile_component.lock, opal_mutex_t); - - mca_base_param_reg_int(&mca_rds_resfile_component.super.rds_version, "debug", - "Toggle debug output for resfile RDS component", - false, false, (int)false, - &mca_rds_resfile_component.debug); - mca_base_param_reg_string(&mca_rds_resfile_component.super.rds_version, "name", - "ORTE Resource filename", - false, false, NULL, - &mca_rds_resfile_component.filename); - - orte_rds_resfile_queried = false; - - return ORTE_SUCCESS; -} - - -static orte_rds_base_module_t *orte_rds_resfile_init(void) -{ - /* if we are not an HNP, then don't select us */ - if (!orte_process_info.seed) { - return NULL; - } - - /* if we are an HNP, then volunteer */ - OBJ_DESTRUCT(&mca_rds_resfile_component.lock); - - return &orte_rds_resfile_module; -} - -int orte_rds_resfile_finalize(void) -{ - return ORTE_SUCCESS; -} - -/** - * Close all subsystems. - */ - -static int orte_rds_resfile_close(void) -{ - return ORTE_SUCCESS; -} - - diff --git a/orte/mca/rds/resfile/rds_resfile_parse_attributes.c b/orte/mca/rds/resfile/rds_resfile_parse_attributes.c deleted file mode 100644 index f31161be24..0000000000 --- a/orte/mca/rds/resfile/rds_resfile_parse_attributes.c +++ /dev/null @@ -1,341 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -#include "orte_config.h" - -#include -#include - -#include "opal/util/output.h" - -#include "orte/dss/dss.h" -#include "orte/orte_constants.h" -#include "orte/mca/errmgr/errmgr.h" - -#include "orte/mca/rds/resfile/rds_resfile.h" - -int orte_rds_resfile_parse_fe(orte_rds_cell_desc_t *cell, FILE *fp) -{ - char *line, *ssh; - orte_rds_cell_attr_t *na; - bool tf_flag; - int rc; - - while (NULL != (line = orte_rds_resfile_getline(fp))) { - if (0 == strncmp(line, "keyval.value = OBJ_NEW(orte_data_value_t); - if (NULL == na->keyval.value) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (0 == strncmp(line, "keyval.key = strdup(ORTE_RDS_FE_NAME); - na->keyval.value->type = ORTE_STRING; - if (NULL == (na->keyval.value->data = orte_rds_resfile_parse_field(line))) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_READ_FAILURE); - return ORTE_ERR_FILE_READ_FAILURE; - } - } else if (0 == strncmp(line, "keyval.key = strdup(ORTE_RDS_FE_TMP); - na->keyval.value->type = ORTE_STRING; - if (NULL == (na->keyval.value->data = orte_rds_resfile_parse_field(line))) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_READ_FAILURE); - return ORTE_ERR_FILE_READ_FAILURE; - } - } else if (0 == strncmp(line, "keyval.key = strdup(ORTE_RDS_FE_SSH); - na->keyval.value->type = ORTE_BOOL; - if (NULL == (ssh = orte_rds_resfile_parse_field(line))) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_READ_FAILURE); - return ORTE_ERR_FILE_READ_FAILURE; - } - if (0 == strncmp(ssh, "true", 4)) { - tf_flag = true; - } else { - tf_flag = false; - } - if (ORTE_SUCCESS != (rc = orte_dss.copy(&(na->keyval.value->data), &tf_flag, ORTE_BOOL))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } else { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - return ORTE_ERR_BAD_PARAM; - } - opal_list_append(&(cell->attributes), &na->super); - } - - return ORTE_SUCCESS; -} - -int orte_rds_resfile_parse_cd(orte_rds_cell_desc_t *cell, FILE *fp) -{ - char *line, *tmp; - orte_rds_cell_attr_t *na; - int16_t dummy; - int rc; - - while (NULL != (line = orte_rds_resfile_getline(fp))) { - if (0 == strncmp(line, "keyval.value = OBJ_NEW(orte_data_value_t); - if (NULL == na->keyval.value) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (0 == strncmp(line, "keyval.key = strdup(ORTE_RDS_COMP_NUM_DOMAINS); - na->keyval.value->type = ORTE_INT16; - if (NULL == (tmp = orte_rds_resfile_parse_field(line))) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_READ_FAILURE); - free(line); - return ORTE_ERR_FILE_READ_FAILURE; - } - dummy = 13; /* strtoi(tmp); */ - free(tmp); - if (ORTE_SUCCESS != (rc = orte_dss.copy(&(na->keyval.value->data), &dummy, ORTE_INT16))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } else if (0 == strncmp(line, "keyval.key = strdup(ORTE_RDS_COMP_NODES_DOMAIN); - na->keyval.value->type = ORTE_INT16; - if (NULL == (tmp = orte_rds_resfile_parse_field(line))) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_READ_FAILURE); - free(line); - return ORTE_ERR_FILE_READ_FAILURE; - } - dummy = 13; /* strtoi(tmp); */ - free(tmp); - if (ORTE_SUCCESS != (rc = orte_dss.copy(&(na->keyval.value->data), &dummy, ORTE_INT16))) { - ORTE_ERROR_LOG(rc); - return rc; - } - } else { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - free(line); - return ORTE_ERR_BAD_PARAM; - } - opal_list_append(&(cell->attributes), &na->super); - free(line); - } - - return ORTE_SUCCESS; -} - -int orte_rds_resfile_parse_os(orte_rds_cell_desc_t *cell, FILE *fp) -{ - char *line; - orte_rds_cell_attr_t *na; - - while (NULL != (line = orte_rds_resfile_getline(fp))) { - if (0 == strncmp(line, "keyval.value = OBJ_NEW(orte_data_value_t); - if (NULL == na->keyval.value) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - if (0 == strncmp(line, "keyval.key = strdup(ORTE_RDS_OS_TYPE); - na->keyval.value->type = ORTE_STRING; - if (NULL == (na->keyval.value->data = orte_rds_resfile_parse_field(line))) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_READ_FAILURE); - free(line); - return ORTE_ERR_FILE_READ_FAILURE; - } - } else if (0 == strncmp(line, "keyval.key = strdup(ORTE_RDS_OS_VENDOR); - na->keyval.value->type = ORTE_STRING; - if (NULL == (na->keyval.value->data = orte_rds_resfile_parse_field(line))) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_READ_FAILURE); - free(line); - return ORTE_ERR_FILE_READ_FAILURE; - } - } else if (0 == strncmp(line, "keyval.key = strdup(ORTE_RDS_OS_VERSION); - na->keyval.value->type = ORTE_STRING; - if (NULL == (na->keyval.value->data = orte_rds_resfile_parse_field(line))) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_READ_FAILURE); - free(line); - return ORTE_ERR_FILE_READ_FAILURE; - } - } else { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - free(line); - return ORTE_ERR_BAD_PARAM; - } - opal_list_append(&(cell->attributes), &na->super); - free(line); - } - - return ORTE_SUCCESS; -} - -int orte_rds_resfile_parse_fs(orte_rds_cell_desc_t *cell, FILE *fp) -{ - char *line; -/* orte_rds_cell_attr_t *na; */ - - while (NULL != (line = orte_rds_resfile_getline(fp))) { - if (0 == strncmp(line, "keyval.key = strdup(ORTE_RDS_FS_TYPE); - na->keyval.type = ORTE_STRING; - if (NULL == (na->keyval.value.strptr = orte_rds_resfile_parse_field(line))) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_READ_FAILURE); - free(line); - return ORTE_ERR_FILE_READ_FAILURE; - } - } else if (0 == strncmp(line, "keyval.key = strdup(ORTE_RDS_FS_HOME); - na->keyval.type = ORTE_STRING; - if (NULL == (na->keyval.value.strptr = orte_rds_resfile_parse_field(line))) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_READ_FAILURE); - free(line); - return ORTE_ERR_FILE_READ_FAILURE; - } - } else if (0 == strncmp(line, "keyval.key = strdup(ORTE_RDS_FS_SCRATCH); - na->keyval.type = ORTE_STRING; - if (NULL == (na->keyval.value.strptr = orte_rds_resfile_parse_field(line))) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_READ_FAILURE); - free(line); - return ORTE_ERR_FILE_READ_FAILURE; - } else if (0 == strncmp(line, "keyval.key = strdup(ORTE_RDS_FS_DOMAINS); - na->keyval.type = ORTE_INT16; - if (NULL == (tmp = orte_rds_resfile_parse_field(line))) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_READ_FAILURE); - free(line); - return ORTE_ERR_FILE_READ_FAILURE; - } - na->keyval.value.i16 = 13; /* strtoi(tmp); */ - } else if (0 == strncmp(line, "keyval.key = strdup(ORTE_RDS_FS_NODES_DOMAIN); - na->keyval.type = ORTE_INT16; - if (NULL == (tmp = orte_rds_resfile_parse_field(line))) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_READ_FAILURE); - free(line); - return ORTE_ERR_FILE_READ_FAILURE; - } - na->keyval.value.i16 = 13; /*strtoi(tmp); */ - } else { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - free(line); - return ORTE_ERR_BAD_PARAM; - } - opal_list_append(&(cell->attributes), &na->super); -#endif - free(line); - } - - return ORTE_SUCCESS; -} - -int orte_rds_resfile_parse_se(orte_rds_cell_desc_t *cell, FILE *fp) -{ - char *line; - orte_rds_cell_attr_t *na; - char *tmp; - int16_t dummy; - int rc; - - while (NULL != (line = orte_rds_resfile_getline(fp))) { - if (0 == strncmp(line, "keyval.key = strdup(ORTE_RDS_ALLOCATION_SEQUENCE); - na->keyval.value = OBJ_NEW(orte_data_value_t); - if (NULL == na->keyval.value) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - na->keyval.value->type = ORTE_INT16; - if (NULL == (tmp = orte_rds_resfile_parse_field(line))) { - ORTE_ERROR_LOG(ORTE_ERR_FILE_READ_FAILURE); - free(line); - return ORTE_ERR_FILE_READ_FAILURE; - } - dummy = 13; /*strtoi(tmp); */ - free(tmp); - if (ORTE_SUCCESS != (rc = orte_dss.copy(&(na->keyval.value->data), &dummy, ORTE_INT16))) { - ORTE_ERROR_LOG(rc); - return rc; - } - free(line); - } - - return ORTE_SUCCESS; -} - -int orte_rds_resfile_parse_na(orte_rds_cell_desc_t *cell, FILE *fp) -{ - char *line; -/* orte_rds_cell_attr_t *na; */ - - while (NULL != (line = orte_rds_resfile_getline(fp))) { - if (0 == strncmp(line, "cell = src->cell; if (NULL != src->nodename) { (*dest)->nodename = strdup(src->nodename); diff --git a/orte/mca/rmaps/base/data_type_support/rmaps_data_type_packing_fns.c b/orte/mca/rmaps/base/data_type_support/rmaps_data_type_packing_fns.c index d7cf531911..31fe0557ac 100644 --- a/orte/mca/rmaps/base/data_type_support/rmaps_data_type_packing_fns.c +++ b/orte/mca/rmaps/base/data_type_support/rmaps_data_type_packing_fns.c @@ -198,13 +198,7 @@ int orte_rmaps_base_pack_mapped_node(orte_buffer_t *buffer, const void *src, nodes = (orte_mapped_node_t**) src; for (i=0; i < num_vals; i++) { - /* pack the cellid */ - if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(nodes[i]->cell), 1, ORTE_CELLID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* pack the nodename */ + /* pack the nodename */ if (ORTE_SUCCESS != (rc = orte_dss_pack_buffer(buffer, &(nodes[i]->nodename), 1, ORTE_STRING))) { ORTE_ERROR_LOG(rc); return rc; diff --git a/orte/mca/rmaps/base/data_type_support/rmaps_data_type_print_fns.c b/orte/mca/rmaps/base/data_type_support/rmaps_data_type_print_fns.c index f81196159e..e61d7054a5 100644 --- a/orte/mca/rmaps/base/data_type_support/rmaps_data_type_print_fns.c +++ b/orte/mca/rmaps/base/data_type_support/rmaps_data_type_print_fns.c @@ -167,8 +167,8 @@ int orte_rmaps_base_print_mapped_node(char **output, char *prefix, orte_mapped_n asprintf(&pfx2, "%s", prefix); } - asprintf(&tmp, "%sMapped node:\n%s\tCell: %ld\tNodename: %s\tLaunch id: %ld\tUsername: %s\n%s\tDaemon name:", pfx2, pfx2, - (long)src->cell, (NULL == src->nodename ? "NULL" : src->nodename), (long)src->launch_id, + asprintf(&tmp, "%sMapped node:\n%s\tNodename: %s\tLaunch id: %ld\tUsername: %s\n%s\tDaemon name:", pfx2, pfx2, + (NULL == src->nodename ? "NULL" : src->nodename), (long)src->launch_id, (NULL == src->username ? "NULL" : src->username), pfx2); asprintf(&pfx, "%s\t", pfx2); diff --git a/orte/mca/rmaps/base/data_type_support/rmaps_data_type_unpacking_fns.c b/orte/mca/rmaps/base/data_type_support/rmaps_data_type_unpacking_fns.c index fdca96d9ba..c00b6194d1 100644 --- a/orte/mca/rmaps/base/data_type_support/rmaps_data_type_unpacking_fns.c +++ b/orte/mca/rmaps/base/data_type_support/rmaps_data_type_unpacking_fns.c @@ -245,14 +245,6 @@ int orte_rmaps_base_unpack_mapped_node(orte_buffer_t *buffer, void *dest, return ORTE_ERR_OUT_OF_RESOURCE; } - /* unpack the cellid */ - n = 1; - if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, - &(nodes[i]->cell), &n, ORTE_CELLID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* unpack the nodename */ n = 1; if (ORTE_SUCCESS != (rc = orte_dss_unpack_buffer(buffer, diff --git a/orte/mca/rmaps/base/rmaps_base_registry_fns.c b/orte/mca/rmaps/base/rmaps_base_registry_fns.c index ba7a713367..32f8a98b9a 100644 --- a/orte/mca/rmaps/base/rmaps_base_registry_fns.c +++ b/orte/mca/rmaps/base/rmaps_base_registry_fns.c @@ -45,7 +45,6 @@ int orte_rmaps_base_get_job_map(orte_job_map_t **map, orte_jobid_t jobid) orte_mapped_proc_t *proc; orte_mapped_node_t *node; opal_list_item_t *item; - orte_cellid_t *cellptr, cell=ORTE_CELLID_INVALID; orte_vpid_t *vptr; orte_std_cntr_t *sptr; bool *bptr, oversub=false; @@ -65,7 +64,6 @@ int orte_rmaps_base_get_job_map(orte_job_map_t **map, orte_jobid_t jobid) ORTE_PROC_NAME_KEY, ORTE_PROC_APP_CONTEXT_KEY, ORTE_PROC_LOCAL_PID_KEY, - ORTE_CELLID_KEY, ORTE_NODE_NAME_KEY, ORTE_NODE_LAUNCH_ID_KEY, ORTE_NODE_USERNAME_KEY, @@ -245,14 +243,6 @@ int orte_rmaps_base_get_job_map(orte_job_map_t **map, orte_jobid_t jobid) proc->pid = *pidptr; continue; } - if(strcmp(keyval->key, ORTE_CELLID_KEY) == 0) { - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&cellptr, keyval->value, ORTE_CELLID))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - cell = *cellptr; - continue; - } if(strcmp(keyval->key, ORTE_NODE_NAME_KEY) == 0) { /* use the dss.copy function here to protect us against zero-length strings */ if (ORTE_SUCCESS != (rc = orte_dss.copy((void**)&node_name, keyval->value->data, ORTE_STRING))) { @@ -315,7 +305,7 @@ int orte_rmaps_base_get_job_map(orte_job_map_t **map, orte_jobid_t jobid) } /* store this process in the map */ - if (ORTE_SUCCESS != (rc = orte_rmaps_base_add_proc_to_map(mapping, cell, node_name, launch_id, username, oversub, proc))) { + if (ORTE_SUCCESS != (rc = orte_rmaps_base_add_proc_to_map(mapping, node_name, launch_id, username, oversub, proc))) { ORTE_ERROR_LOG(rc); goto cleanup; } @@ -415,7 +405,7 @@ cleanup: return rc; } -int orte_rmaps_base_get_node_map(orte_mapped_node_t **node, orte_cellid_t cell, +int orte_rmaps_base_get_node_map(orte_mapped_node_t **node, char *nodename, orte_jobid_t job) { orte_job_map_t *map; @@ -437,7 +427,7 @@ int orte_rmaps_base_get_node_map(orte_mapped_node_t **node, orte_cellid_t cell, item = opal_list_get_next(item)) { nptr = (orte_mapped_node_t*)item; - if (cell == nptr->cell && 0 == strcmp(nodename, nptr->nodename)) { + if (0 == strcmp(nodename, nptr->nodename)) { *node = nptr; /* protect the node object from release when we get rid * of the map object @@ -550,9 +540,9 @@ int orte_rmaps_base_put_job_map(orte_job_map_t *map) if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&(values[i]), ORTE_GPR_OVERWRITE|ORTE_GPR_TOKENS_AND, #if OPAL_ENABLE_FT == 1 - segment, 16, + segment, 15, #else - segment, 13, + segment, 12, #endif 0))) { ORTE_ERROR_LOG(rc); @@ -585,42 +575,42 @@ int orte_rmaps_base_put_job_map(orte_job_map_t *map) goto cleanup; } - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[2]), ORTE_CELLID_KEY, ORTE_CELLID, &(node->cell)))) { + if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[2]), ORTE_NODE_NAME_KEY, ORTE_STRING, node->nodename))) { ORTE_ERROR_LOG(rc); goto cleanup; } - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[3]), ORTE_NODE_NAME_KEY, ORTE_STRING, node->nodename))) { + if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[3]), ORTE_NODE_LAUNCH_ID_KEY, ORTE_INT32, &(node->launch_id)))) { ORTE_ERROR_LOG(rc); goto cleanup; } - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[4]), ORTE_NODE_LAUNCH_ID_KEY, ORTE_INT32, &(node->launch_id)))) { + if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[4]), ORTE_NODE_USERNAME_KEY, ORTE_STRING, node->username))) { ORTE_ERROR_LOG(rc); goto cleanup; } - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[5]), ORTE_NODE_USERNAME_KEY, ORTE_STRING, node->username))) { + if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[5]), ORTE_NODE_OVERSUBSCRIBED_KEY, ORTE_BOOL, &(node->oversubscribed)))) { ORTE_ERROR_LOG(rc); goto cleanup; } - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[6]), ORTE_NODE_OVERSUBSCRIBED_KEY, ORTE_BOOL, &(node->oversubscribed)))) { + if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[6]), ORTE_PROC_APP_CONTEXT_KEY, ORTE_STD_CNTR, &(proc->app_idx)))) { ORTE_ERROR_LOG(rc); goto cleanup; } - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[7]), ORTE_PROC_APP_CONTEXT_KEY, ORTE_STD_CNTR, &(proc->app_idx)))) { + if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[7]), ORTE_PROC_STATE_KEY, ORTE_PROC_STATE, &proc_state))) { ORTE_ERROR_LOG(rc); goto cleanup; } - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[8]), ORTE_PROC_STATE_KEY, ORTE_PROC_STATE, &proc_state))) { + if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[8]), ORTE_PROC_LOCAL_RANK_KEY, ORTE_VPID, &(proc->local_rank)))) { ORTE_ERROR_LOG(rc); goto cleanup; } - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[9]), ORTE_PROC_LOCAL_RANK_KEY, ORTE_VPID, &(proc->local_rank)))) { + if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[9]), ORTE_NODE_NUM_PROCS_KEY, ORTE_STD_CNTR, &(node->num_procs)))) { ORTE_ERROR_LOG(rc); goto cleanup; } @@ -630,11 +620,7 @@ int orte_rmaps_base_put_job_map(orte_job_map_t *map) goto cleanup; } - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[11]), ORTE_NODE_NUM_PROCS_KEY, ORTE_STD_CNTR, &(node->num_procs)))) { - ORTE_ERROR_LOG(rc); - goto cleanup; - } - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[12]), ORTE_PROC_CPU_LIST_KEY, ORTE_STRING, proc->slot_list))) { + if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[11]), ORTE_PROC_CPU_LIST_KEY, ORTE_STRING, proc->slot_list))) { ORTE_ERROR_LOG(rc); goto cleanup; } @@ -648,21 +634,21 @@ int orte_rmaps_base_put_job_map(orte_job_map_t *map) if( NULL == proc->ckpt_snapshot_loc) proc->ckpt_snapshot_loc = strdup(""); - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[13]), + if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[12]), ORTE_PROC_CKPT_STATE_KEY, ORTE_SIZE, &(proc->ckpt_state)))) { ORTE_ERROR_LOG(rc); goto cleanup; } - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[14]), + if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[13]), ORTE_PROC_CKPT_SNAPSHOT_REF_KEY, ORTE_STRING, proc->ckpt_snapshot_ref))) { ORTE_ERROR_LOG(rc); goto cleanup; } - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[15]), + if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[14]), ORTE_PROC_CKPT_SNAPSHOT_LOC_KEY, ORTE_STRING, proc->ckpt_snapshot_loc))) { ORTE_ERROR_LOG(rc); diff --git a/orte/mca/rmaps/base/rmaps_base_support_fns.c b/orte/mca/rmaps/base/rmaps_base_support_fns.c index 8e5b87c8be..5de4cb5351 100644 --- a/orte/mca/rmaps/base/rmaps_base_support_fns.c +++ b/orte/mca/rmaps/base/rmaps_base_support_fns.c @@ -290,7 +290,7 @@ int orte_rmaps_base_get_mapped_targets(opal_list_t *mapped_node_list, } -int orte_rmaps_base_add_proc_to_map(orte_job_map_t *map, orte_cellid_t cell, char *nodename, int32_t launch_id, +int orte_rmaps_base_add_proc_to_map(orte_job_map_t *map, char *nodename, int32_t launch_id, char *username, bool oversubscribed, orte_mapped_proc_t *proc) { opal_list_item_t *item; @@ -301,7 +301,7 @@ int orte_rmaps_base_add_proc_to_map(orte_job_map_t *map, orte_cellid_t cell, cha item = opal_list_get_next(item)) { node = (orte_mapped_node_t*)item; - if (cell == node->cell && 0 == strcmp(nodename, node->nodename)) { + if (0 == strcmp(nodename, node->nodename)) { /* node was found - add this proc to that list */ opal_list_append(&node->procs, &proc->super); /* set the oversubscribed flag */ @@ -317,7 +317,6 @@ int orte_rmaps_base_add_proc_to_map(orte_job_map_t *map, orte_cellid_t cell, cha return ORTE_ERR_OUT_OF_RESOURCE; } - node->cell = cell; node->nodename = strdup(nodename); if (NULL != username) { node->username = strdup(username); @@ -357,8 +356,7 @@ int orte_rmaps_base_claim_slot(orte_job_map_t *map, } /* create the process name as an offset from the vpid-start */ - rc = orte_ns.create_process_name(&name, current_node->node_cellid, - jobid, vpid); + rc = orte_ns.create_process_name(&name, jobid, vpid); if (rc != ORTE_SUCCESS) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(proc); @@ -379,8 +377,7 @@ int orte_rmaps_base_claim_slot(orte_job_map_t *map, } /* add the proc to the map */ - if (ORTE_SUCCESS != (rc = orte_rmaps_base_add_proc_to_map(map, current_node->node_cellid, - current_node->node_name, + if (ORTE_SUCCESS != (rc = orte_rmaps_base_add_proc_to_map(map, current_node->node_name, current_node->launch_id, current_node->node_username, oversub, proc))) { @@ -454,7 +451,7 @@ int orte_rmaps_base_update_node_usage(opal_list_t *nodes) } /* setup index/keys for this node */ - rc = orte_schema.get_node_tokens(&(values[i]->tokens), &(values[i]->num_tokens), node->node_cellid, node->node_name); + rc = orte_schema.get_node_tokens(&(values[i]->tokens), &(values[i]->num_tokens), node->node_name); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); goto cleanup; @@ -739,8 +736,7 @@ int orte_rmaps_base_define_daemons(orte_job_map_t *map) if (node->daemon_preexists) continue; /* otherwise, create the daemon's process name and store it on the mapped_node... */ - if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(&node->daemon, ORTE_PROC_MY_NAME->cellid, - 0, vpid))) { + if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(&node->daemon, 0, vpid))) { ORTE_ERROR_LOG(rc); return rc; } diff --git a/orte/mca/rmaps/base/rmaps_class_instances.h b/orte/mca/rmaps/base/rmaps_class_instances.h index 071ab66c72..a3442080aa 100644 --- a/orte/mca/rmaps/base/rmaps_class_instances.h +++ b/orte/mca/rmaps/base/rmaps_class_instances.h @@ -50,7 +50,6 @@ extern "C" { */ static void orte_rmaps_mapped_proc_construct(orte_mapped_proc_t* proc) { - proc->name.cellid = ORTE_CELLID_INVALID; proc->name.jobid = ORTE_JOBID_INVALID; proc->name.vpid = ORTE_VPID_INVALID; proc->rank = ORTE_VPID_INVALID; @@ -68,7 +67,6 @@ static void orte_rmaps_mapped_proc_construct(orte_mapped_proc_t* proc) static void orte_rmaps_mapped_proc_destruct(orte_mapped_proc_t* proc) { - proc->name.cellid = ORTE_CELLID_INVALID; proc->name.jobid = ORTE_JOBID_INVALID; proc->name.vpid = ORTE_VPID_INVALID; proc->rank = ORTE_VPID_INVALID; diff --git a/orte/mca/rmaps/base/rmaps_private.h b/orte/mca/rmaps/base/rmaps_private.h index fb168b456e..806679c1cf 100644 --- a/orte/mca/rmaps/base/rmaps_private.h +++ b/orte/mca/rmaps/base/rmaps_private.h @@ -91,7 +91,7 @@ ORTE_DECLSPEC int orte_rmaps_base_get_job_map(orte_job_map_t **map, orte_jobid_t * Retrieve the information for a job map from the registry and provide the info * for the specified node */ -ORTE_DECLSPEC int orte_rmaps_base_get_node_map(orte_mapped_node_t **node, orte_cellid_t cell, +ORTE_DECLSPEC int orte_rmaps_base_get_node_map(orte_mapped_node_t **node, char *nodename, orte_jobid_t job); @@ -158,7 +158,7 @@ void orte_rmaps_base_recv(int status, orte_process_name_t* sender, * procs. If not, then add new node entry and put this proc * on its list. */ -int orte_rmaps_base_add_proc_to_map(orte_job_map_t *map, orte_cellid_t cell, char *nodename, int32_t launch_id, +int orte_rmaps_base_add_proc_to_map(orte_job_map_t *map, char *nodename, int32_t launch_id, char *username, bool oversubscribed, orte_mapped_proc_t *proc); ORTE_DECLSPEC int orte_rmaps_base_get_target_nodes(opal_list_t* node_list, orte_jobid_t jobid, diff --git a/orte/mca/rmaps/rmaps.h b/orte/mca/rmaps/rmaps.h index e38b34bedb..22869437fb 100644 --- a/orte/mca/rmaps/rmaps.h +++ b/orte/mca/rmaps/rmaps.h @@ -67,7 +67,7 @@ typedef int (*orte_rmaps_base_module_get_job_map_fn_t)(orte_job_map_t **map, ort * Get the map for a job on a specific node from the registry. Providing a jobid of * ORTE_JOBID_WILDCARD will return the map of all processes on that node */ -typedef int (*orte_rmaps_base_module_get_node_map_fn_t)(orte_mapped_node_t **node, orte_cellid_t cell, +typedef int (*orte_rmaps_base_module_get_node_map_fn_t)(orte_mapped_node_t **node, char *nodename, orte_jobid_t job); /** diff --git a/orte/mca/rmaps/rmaps_types.h b/orte/mca/rmaps/rmaps_types.h index 5c45773ce4..22abac3425 100644 --- a/orte/mca/rmaps/rmaps_types.h +++ b/orte/mca/rmaps/rmaps_types.h @@ -72,7 +72,6 @@ OBJ_CLASS_DECLARATION(orte_mapped_proc_t); */ struct orte_mapped_node_t { opal_list_item_t super; - orte_cellid_t cell; /* cell where this node is located */ char *nodename; /* name of node */ int32_t launch_id; /* launch id of node - needed by some systems */ char *username; diff --git a/orte/mca/rmgr/base/rmgr_base_xconnect.c b/orte/mca/rmgr/base/rmgr_base_xconnect.c index 20053f7aa8..1af0689b25 100644 --- a/orte/mca/rmgr/base/rmgr_base_xconnect.c +++ b/orte/mca/rmgr/base/rmgr_base_xconnect.c @@ -50,7 +50,6 @@ int orte_rmgr_base_xconnect(orte_jobid_t child, orte_jobid_t parent) int rc; /* init the name fields */ - name.cellid = ORTE_PROC_MY_NAME->cellid; name.vpid = ORTE_VPID_WILDCARD; /* we want data from everyone in the job */ /* get the child's contact info */ diff --git a/orte/mca/rmgr/proxy/rmgr_proxy.c b/orte/mca/rmgr/proxy/rmgr_proxy.c index 17827bf578..db1e5ad48f 100644 --- a/orte/mca/rmgr/proxy/rmgr_proxy.c +++ b/orte/mca/rmgr/proxy/rmgr_proxy.c @@ -330,15 +330,13 @@ static void orte_rmgr_proxy_xconnect_callback(orte_gpr_notify_data_t *data, void static void orte_rmgr_proxy_wireup_stdin(orte_jobid_t jobid) { int rc; - orte_process_name_t* name; + orte_process_name_t name = {ORTE_JOBID_INVALID, 0}; OPAL_TRACE(1); - if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(&name, 0, jobid, 0))) { - ORTE_ERROR_LOG(rc); - return; - } - if (ORTE_SUCCESS != (rc = orte_iof.iof_push(name, ORTE_NS_CMP_JOBID, ORTE_IOF_STDIN, 0))) { + name.jobid = jobid; + + if (ORTE_SUCCESS != (rc = orte_iof.iof_push(&name, ORTE_NS_CMP_JOBID, ORTE_IOF_STDIN, 0))) { ORTE_ERROR_LOG(rc); } } @@ -483,7 +481,7 @@ static int orte_rmgr_proxy_spawn_job( opal_list_t *attributes) { int rc; - orte_process_name_t name = {0, ORTE_JOBID_INVALID, 0}; + orte_process_name_t name = {ORTE_JOBID_INVALID, 0}; orte_attribute_t *attr; uint8_t flags, *fptr; orte_proc_state_t *gate; diff --git a/orte/mca/rmgr/rmgr.h b/orte/mca/rmgr/rmgr.h index d34a152670..71d93a5e1a 100644 --- a/orte/mca/rmgr/rmgr.h +++ b/orte/mca/rmgr/rmgr.h @@ -105,8 +105,8 @@ typedef int (*orte_rmgr_base_module_spawn_job_fn_t)( * Connect a process to other processes, possibly in other jobs. Note that this * function supports WILDCARD process name fields. Thus, a process can request * connection to all other processes in another job by providing a single - * entry in the connect array that has a cellid of ORTE_CELLID_WILDCARD, the - * desired jobid, and a vpid of ORTE_VPID_WILDCARD. + * entry in the connect array that has the + * desired jobid and a vpid of ORTE_VPID_WILDCARD. */ typedef int (*orte_rmgr_base_module_connect_fn_t)(orte_std_cntr_t num_connect, orte_process_name_t *connect); @@ -115,7 +115,7 @@ typedef int (*orte_rmgr_base_module_connect_fn_t)(orte_std_cntr_t num_connect, * Disconnect a process from one or more other processes. Note that this * function supports WILDCARD process name fields. Thus, a process can request * to disconnect from all other processes in another job by providing a single - * entry in the connect array that has a cellid of ORTE_CELLID_WILDCARD, the + * entry in the connect array that has the * desired jobid, and a vpid of ORTE_VPID_WILDCARD. */ typedef int (*orte_rmgr_base_module_disconnect_fn_t)(orte_std_cntr_t num_disconnect, diff --git a/orte/mca/rmgr/urm/rmgr_urm.c b/orte/mca/rmgr/urm/rmgr_urm.c index 8953d6c1b4..12105a2cbd 100644 --- a/orte/mca/rmgr/urm/rmgr_urm.c +++ b/orte/mca/rmgr/urm/rmgr_urm.c @@ -218,18 +218,15 @@ static void orte_rmgr_urm_xconnect_callback(orte_gpr_notify_data_t *data, void * static void orte_rmgr_urm_wireup_stdin(orte_jobid_t jobid) { int rc; - orte_process_name_t* name; + orte_process_name_t name = {ORTE_JOBID_INVALID, 0}; OPAL_TRACE(1); - if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(&name, 0, jobid, 0))) { - ORTE_ERROR_LOG(rc); - return; - } - if (ORTE_SUCCESS != (rc = orte_iof.iof_push(name, ORTE_NS_CMP_JOBID, ORTE_IOF_STDIN, 0))) { + name.jobid = jobid; + + if (ORTE_SUCCESS != (rc = orte_iof.iof_push(&name, ORTE_NS_CMP_JOBID, ORTE_IOF_STDIN, 0))) { ORTE_ERROR_LOG(rc); } - free(name); } @@ -390,7 +387,7 @@ static int orte_rmgr_urm_spawn_job( opal_list_t *attributes) { int rc; - orte_process_name_t* name; + orte_process_name_t name = {ORTE_JOBID_INVALID, 0}; struct timeval urmstart, urmstop; orte_attribute_t *flow, *attr; uint8_t flags, *fptr; @@ -468,19 +465,15 @@ static int orte_rmgr_urm_spawn_job( * setup I/O forwarding */ - if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(&name, 0, *jobid, 0))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (ORTE_SUCCESS != (rc = orte_iof.iof_pull(name, ORTE_NS_CMP_JOBID, ORTE_IOF_STDOUT, 1))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (ORTE_SUCCESS != (rc = orte_iof.iof_pull(name, ORTE_NS_CMP_JOBID, ORTE_IOF_STDERR, 2))) { - ORTE_ERROR_LOG(rc); - return rc; - } - free(name); /* done with this */ + name.jobid = *jobid; + if (ORTE_SUCCESS != (rc = orte_iof.iof_pull(&name, ORTE_NS_CMP_JOBID, ORTE_IOF_STDOUT, 1))) { + ORTE_ERROR_LOG(rc); + return rc; + } + if (ORTE_SUCCESS != (rc = orte_iof.iof_pull(&name, ORTE_NS_CMP_JOBID, ORTE_IOF_STDERR, 2))) { + ORTE_ERROR_LOG(rc); + return rc; + } #if 0 { diff --git a/orte/mca/rml/base/rml_base_contact.c b/orte/mca/rml/base/rml_base_contact.c index 45b39f96b8..d2f4681e72 100644 --- a/orte/mca/rml/base/rml_base_contact.c +++ b/orte/mca/rml/base/rml_base_contact.c @@ -197,9 +197,9 @@ orte_rml_base_register_contact_info(void) tmp2 = strrchr(tmp, '/') + 1; tmp3 = strrchr(tmp, ':'); if(NULL == tmp2 || NULL == tmp3) { - opal_output(0, "[%lu,%lu,%lu] orte_rml_base_init: invalid address \'%s\' " + opal_output(0, "%s orte_rml_base_init: invalid address \'%s\' " "returned for selected oob interfaces.\n", - ORTE_NAME_ARGS(orte_process_info.my_name), tmp); + ORTE_NAME_PRINT(orte_process_info.my_name), tmp); ORTE_ERROR_LOG(ORTE_ERROR); free(tmp); return ORTE_ERROR; diff --git a/orte/mca/routed/unity/routed_unity_component.c b/orte/mca/routed/unity/routed_unity_component.c index c5f02973aa..f311aad5e1 100644 --- a/orte/mca/routed/unity/routed_unity_component.c +++ b/orte/mca/routed/unity/routed_unity_component.c @@ -79,9 +79,9 @@ orte_routed_unity_update_route(orte_process_name_t *target, orte_process_name_t *route) { OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output, - "routed_unity_update: [%ld,%ld,%ld] --> [%ld,%ld,%ld]", - ORTE_NAME_ARGS(target), - ORTE_NAME_ARGS(route))); + "routed_unity_update: %s --> %s", + ORTE_NAME_PRINT(target), + ORTE_NAME_PRINT(route))); return ORTE_SUCCESS; } @@ -90,8 +90,8 @@ orte_process_name_t orte_routed_unity_get_route(orte_process_name_t *target) { OPAL_OUTPUT_VERBOSE((1, orte_routed_base_output, - "routed_unity_get([%ld,%ld,%ld]) --> [%ld,%ld,%ld]", - ORTE_NAME_ARGS(target), - ORTE_NAME_ARGS(target))); + "routed_unity_get(%s) --> %s", + ORTE_NAME_PRINT(target), + ORTE_NAME_PRINT(target))); return *target; } diff --git a/orte/mca/schema/base/base.h b/orte/mca/schema/base/base.h index ec9e17f016..7e70229f2d 100644 --- a/orte/mca/schema/base/base.h +++ b/orte/mca/schema/base/base.h @@ -54,9 +54,8 @@ extern "C" { /* general usage functions */ int orte_schema_base_get_proc_tokens(char ***tokens, orte_std_cntr_t* num_tokens, orte_process_name_t *proc); -int orte_schema_base_get_node_tokens(char ***tokens, orte_std_cntr_t* num_tokens, orte_cellid_t cellid, char *nodename); +int orte_schema_base_get_node_tokens(char ***tokens, orte_std_cntr_t* num_tokens, char *nodename); int orte_schema_base_get_job_tokens(char ***tokens, orte_std_cntr_t* num_tokens, orte_jobid_t jobid); -int orte_schema_base_get_cell_tokens(char ***tokens, orte_std_cntr_t* num_tokens, orte_cellid_t cellid); int orte_schema_base_get_job_segment_name(char **name, orte_jobid_t jobid); int orte_schema_base_extract_jobid_from_segment_name(orte_jobid_t *jobid, char *name); int orte_schema_base_store_my_info(void); diff --git a/orte/mca/schema/base/schema_base_fns.c b/orte/mca/schema/base/schema_base_fns.c index 19fb01b476..9b608b2ee8 100644 --- a/orte/mca/schema/base/schema_base_fns.c +++ b/orte/mca/schema/base/schema_base_fns.c @@ -82,40 +82,23 @@ CLEANUP: return rc; } -int orte_schema_base_get_node_tokens(char ***node_tokens, orte_std_cntr_t* num_tokens, orte_cellid_t cellid, char *nodename) +int orte_schema_base_get_node_tokens(char ***node_tokens, orte_std_cntr_t* num_tokens, char *nodename) { - int rc; char** tokens; - char* cellid_string; - tokens = (char**)malloc(3 * sizeof(char*)); + tokens = (char**)malloc(2 * sizeof(char*)); if (NULL == tokens) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; } - if (ORTE_SUCCESS != (rc = orte_ns.convert_cellid_to_string(&cellid_string, cellid))) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - asprintf(&tokens[0], "%s-%s", ORTE_CELLID_KEY, cellid_string); - free(cellid_string); - tokens[1] = strdup(nodename); - tokens[2] = NULL; + tokens[0] = strdup(nodename); + tokens[1] = NULL; *node_tokens = tokens; if(num_tokens != NULL) - *num_tokens = 2; + *num_tokens = 1; + return ORTE_SUCCESS; - -CLEANUP: - if (NULL != tokens) { - if (NULL != tokens[0]) - free(tokens[0]); - if (NULL != tokens[1]) - free(tokens[1]); - free(tokens); - } - return rc; } int orte_schema_base_get_job_tokens(char ***job_tokens, orte_std_cntr_t* num_tokens, orte_jobid_t jobid) @@ -150,49 +133,6 @@ CLEANUP: return rc; } -int orte_schema_base_get_cell_tokens(char ***cell_tokens, orte_std_cntr_t* num_tokens, orte_cellid_t cellid) -{ - int rc; - char **tokens; - char *site, *resource, *cellstr; - orte_std_cntr_t n, i; - - n = 1; - - if (ORTE_SUCCESS != (rc = orte_ns.get_cell_info(cellid, &site, &resource))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (ORTE_SUCCESS != (rc = orte_ns.convert_cellid_to_string(&cellstr, cellid))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - if (NULL != site) n++; - if (NULL != resource) n++; - - tokens = (char**)malloc(n * sizeof(char*)); - if (NULL == tokens) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - tokens[0] = cellstr; - i=1; - if (NULL != site) { - tokens[i] = site; - i++; - } - if (NULL != resource) { - tokens[i] = resource; - } - - *num_tokens = n; - *cell_tokens = tokens; - - return ORTE_SUCCESS; -} - int orte_schema_base_get_job_segment_name(char **name, orte_jobid_t jobid) { char *jobidstring; @@ -228,7 +168,7 @@ int orte_schema_base_extract_jobid_from_segment_name(orte_jobid_t *jobid, char * jobstring++; if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_jobid(&job, jobstring))) { ORTE_ERROR_LOG(rc); - opal_output(0, "[%lu,%lu,%lu] %s\n", ORTE_NAME_ARGS(orte_process_info.my_name), jobstring); + opal_output(0, "%s %s\n", ORTE_NAME_PRINT(orte_process_info.my_name), jobstring); return rc; } *jobid = job; diff --git a/orte/mca/schema/base/schema_base_open.c b/orte/mca/schema/base/schema_base_open.c index 38a5a25481..127b74e0f7 100644 --- a/orte/mca/schema/base/schema_base_open.c +++ b/orte/mca/schema/base/schema_base_open.c @@ -42,7 +42,6 @@ orte_schema_base_module_t orte_schema = { orte_schema_base_get_proc_tokens, orte_schema_base_get_node_tokens, orte_schema_base_get_job_tokens, - orte_schema_base_get_cell_tokens, orte_schema_base_get_job_segment_name, orte_schema_base_extract_jobid_from_segment_name, orte_schema_base_get_std_trigger_name, diff --git a/orte/mca/schema/schema.h b/orte/mca/schema/schema.h index 0e95506bf5..1955b0fedb 100644 --- a/orte/mca/schema/schema.h +++ b/orte/mca/schema/schema.h @@ -46,14 +46,8 @@ typedef int (*orte_schema_get_job_tokens_fn_t)( typedef int (*orte_schema_get_node_tokens_fn_t)( char ***tokens, orte_std_cntr_t* num_tokens, - orte_cellid_t cellid, char *nodename); -typedef int (*orte_schema_get_cell_tokens_fn_t)( - char ***tokens, - orte_std_cntr_t* num_tokens, - orte_cellid_t cellid); - typedef int (*orte_schema_get_job_segment_name_fn_t)(char **name, orte_jobid_t jobid); typedef int (*orte_schema_extract_jobid_from_segment_name_fn_t)(orte_jobid_t *jobid, char *name); @@ -78,7 +72,6 @@ struct orte_schema_base_module_1_0_0_t { orte_schema_get_proc_tokens_fn_t get_proc_tokens; orte_schema_get_node_tokens_fn_t get_node_tokens; orte_schema_get_job_tokens_fn_t get_job_tokens; - orte_schema_get_cell_tokens_fn_t get_cell_tokens; orte_schema_get_job_segment_name_fn_t get_job_segment_name; orte_schema_extract_jobid_from_segment_name_fn_t extract_jobid_from_segment_name; orte_schema_get_std_trigger_name_fn_t get_std_trigger_name; diff --git a/orte/mca/schema/schema_types.h b/orte/mca/schema/schema_types.h index 33f6e143a8..226ddf6dcd 100644 --- a/orte/mca/schema/schema_types.h +++ b/orte/mca/schema/schema_types.h @@ -59,7 +59,6 @@ * ORTE-wide key names for storing/retrieving data from the registry. * Subsystem-specific keys will be defined in each=/ subsystem's xxx_types.h file. */ -#define ORTE_CELLID_KEY "orte-cellid" #define ORTE_JOBGRP_KEY "orte-jobgrp" #define ORTE_JOBID_KEY "orte-jobid" #define ORTE_VPID_KEY "orte-vpid" diff --git a/orte/mca/sds/base/base.h b/orte/mca/sds/base/base.h index 3fc6d28763..baccd00d8c 100644 --- a/orte/mca/sds/base/base.h +++ b/orte/mca/sds/base/base.h @@ -83,16 +83,14 @@ extern "C" { orte_vpid_t local_rank, orte_std_cntr_t num_local_procs, int fd); - ORTE_DECLSPEC int orte_ns_nds_bproc_put(orte_cellid_t cell, - orte_jobid_t job, + ORTE_DECLSPEC int orte_ns_nds_bproc_put(orte_jobid_t job, orte_vpid_t vpid_start, orte_vpid_t global_vpid_start, orte_std_cntr_t num_procs, orte_vpid_t local_rank, orte_std_cntr_t num_local_procs, char ***env); - ORTE_DECLSPEC int orte_ns_nds_xcpu_put(orte_cellid_t cell, - orte_jobid_t job, + ORTE_DECLSPEC int orte_ns_nds_xcpu_put(orte_jobid_t job, orte_vpid_t vpid_start, orte_std_cntr_t num_procs, orte_vpid_t local_rank, diff --git a/orte/mca/sds/base/sds_base_put.c b/orte/mca/sds/base/sds_base_put.c index 4fdd29eca1..587810ebe7 100644 --- a/orte/mca/sds/base/sds_base_put.c +++ b/orte/mca/sds/base/sds_base_put.c @@ -40,16 +40,11 @@ int orte_ns_nds_env_put(const orte_process_name_t* name, char ***env) { char* param; - char* cellid; char* jobid; char* vpid; char* value; int rc; - if(ORTE_SUCCESS != (rc = orte_ns.get_cellid_string(&cellid, name))) { - ORTE_ERROR_LOG(rc); - return rc; - } if(ORTE_SUCCESS != (rc = orte_ns.get_jobid_string(&jobid, name))) { ORTE_ERROR_LOG(rc); return rc; @@ -86,14 +81,6 @@ int orte_ns_nds_env_put(const orte_process_name_t* name, free(param); /* setup the name */ - if(NULL == (param = mca_base_param_environ_variable("ns","nds","cellid"))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - opal_setenv(param, cellid, true, env); - free(param); - free(cellid); - if(NULL == (param = mca_base_param_environ_variable("ns","nds","jobid"))) { ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); return ORTE_ERR_OUT_OF_RESOURCE; @@ -161,7 +148,7 @@ int orte_ns_nds_env_put(const orte_process_name_t* name, * @retval ORTE_SUCCESS * @retval error */ -int orte_ns_nds_bproc_put(orte_cellid_t cell, orte_jobid_t job, +int orte_ns_nds_bproc_put(orte_jobid_t job, orte_vpid_t vpid_start, orte_vpid_t global_vpid_start, orte_std_cntr_t num_procs, orte_vpid_t local_rank, @@ -199,18 +186,6 @@ int orte_ns_nds_bproc_put(orte_cellid_t cell, orte_jobid_t job, free(param); /* setup the name */ - if(ORTE_SUCCESS != (rc = orte_ns.convert_cellid_to_string(&value, cell))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if(NULL == (param = mca_base_param_environ_variable("ns","nds","cellid"))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - opal_setenv(param, value, true, env); - free(param); - free(value); - if(ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&value, job))) { ORTE_ERROR_LOG(rc); return rc; @@ -298,7 +273,7 @@ int orte_ns_nds_bproc_put(orte_cellid_t cell, orte_jobid_t job, * @retval ORTE_SUCCESS * @retval error */ -int orte_ns_nds_xcpu_put(orte_cellid_t cell, orte_jobid_t job, +int orte_ns_nds_xcpu_put(orte_jobid_t job, orte_vpid_t vpid_start, orte_std_cntr_t num_procs, orte_vpid_t local_rank, orte_std_cntr_t num_local_procs, @@ -327,19 +302,7 @@ int orte_ns_nds_xcpu_put(orte_cellid_t cell, orte_jobid_t job, free(param); /* setup the name */ - if(ORTE_SUCCESS != (rc = orte_ns.convert_cellid_to_string(&value, cell))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if(NULL == (param = mca_base_param_environ_variable("ns","nds","cellid"))) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - opal_setenv(param, value, true, env); - free(param); - free(value); - - if(ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&value, job))) { + if(ORTE_SUCCESS != (rc = orte_ns.convert_jobid_to_string(&value, job))) { ORTE_ERROR_LOG(rc); return rc; } diff --git a/orte/mca/sds/bproc/sds_bproc_module.c b/orte/mca/sds/bproc/sds_bproc_module.c index 64f1c10b51..e92a45e677 100644 --- a/orte/mca/sds/bproc/sds_bproc_module.c +++ b/orte/mca/sds/bproc/sds_bproc_module.c @@ -87,27 +87,14 @@ int orte_sds_bproc_set_name(void) } else { - orte_cellid_t cellid; orte_jobid_t jobid; orte_vpid_t vpid; orte_vpid_t vpid_start; - char* cellid_string; int num_procs; char *bproc_rank_string; int bproc_rank; int stride; - id = mca_base_param_register_string("ns", "nds", "cellid", NULL, NULL); - mca_base_param_lookup_string(id, &cellid_string); - if (NULL == cellid_string) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_cellid(&cellid, cellid_string))) { - ORTE_ERROR_LOG(rc); - return(rc); - } - id = mca_base_param_register_string("ns", "nds", "jobid", NULL, NULL); mca_base_param_lookup_string(id, &jobid_string); if (NULL == jobid_string) { @@ -159,7 +146,6 @@ int orte_sds_bproc_set_name(void) /* create our name */ if (ORTE_SUCCESS != (rc = orte_ns.create_process_name( &(orte_process_info.my_name), - cellid, jobid, vpid))) { ORTE_ERROR_LOG(rc); diff --git a/orte/mca/sds/cnos/sds_cnos_module.c b/orte/mca/sds/cnos/sds_cnos_module.c index 9dc5bc1af8..33eba735fd 100644 --- a/orte/mca/sds/cnos/sds_cnos_module.c +++ b/orte/mca/sds/cnos/sds_cnos_module.c @@ -66,20 +66,17 @@ int orte_sds_cnos_set_name(void) { int rc; - orte_cellid_t cellid; orte_jobid_t jobid; orte_vpid_t vpid; /* Get our process information * - * we're going to make up the cellid and jobid. find our vpid, + * we're going to make up the jobid. find our vpid, * assuming range starts at 0 */ - cellid = 0; jobid = 1; /* not 0, since it has special meaning */ vpid = (orte_vpid_t) cnos_get_rank(); if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(&(orte_process_info.my_name), - cellid, jobid, vpid))) { ORTE_ERROR_LOG(rc); diff --git a/orte/mca/sds/env/sds_env_module.c b/orte/mca/sds/env/sds_env_module.c index 3b2e327c79..51ec9797f7 100644 --- a/orte/mca/sds/env/sds_env_module.c +++ b/orte/mca/sds/env/sds_env_module.c @@ -63,24 +63,11 @@ orte_sds_env_set_name(void) } else { - orte_cellid_t cellid; orte_jobid_t jobid; orte_vpid_t vpid; - char* cellid_string; char* jobid_string; char* vpid_string; - id = mca_base_param_register_string("ns", "nds", "cellid", NULL, NULL); - mca_base_param_lookup_string(id, &cellid_string); - if (NULL == cellid_string) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_cellid(&cellid, cellid_string))) { - ORTE_ERROR_LOG(rc); - return(rc); - } - id = mca_base_param_register_string("ns", "nds", "jobid", NULL, NULL); mca_base_param_lookup_string(id, &jobid_string); if (NULL == jobid_string) { @@ -105,7 +92,6 @@ orte_sds_env_set_name(void) if (ORTE_SUCCESS != (rc = orte_ns.create_process_name( &(orte_process_info.my_name), - cellid, jobid, vpid))) { ORTE_ERROR_LOG(rc); diff --git a/orte/mca/sds/portals_utcp/sds_portals_utcp_module.c b/orte/mca/sds/portals_utcp/sds_portals_utcp_module.c index c0454207f6..1fe255618e 100644 --- a/orte/mca/sds/portals_utcp/sds_portals_utcp_module.c +++ b/orte/mca/sds/portals_utcp/sds_portals_utcp_module.c @@ -66,7 +66,6 @@ int orte_sds_portals_utcp_set_name(void) { int rc, i, len, num_procs; - orte_cellid_t cellid; orte_jobid_t jobid; orte_vpid_t vpid; char* vpid_string; @@ -81,10 +80,9 @@ orte_sds_portals_utcp_set_name(void) /* Get our process information * - * we're going to make up the cellid and jobid. find our vpid, + * we're going to make up the jobid. find our vpid, * assuming range starts at 0 */ - cellid = 0; jobid = 1; /* not 0, since it has special meaning */ if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_vpid(&vpid, vpid_string))) { ORTE_ERROR_LOG(rc); @@ -92,7 +90,6 @@ orte_sds_portals_utcp_set_name(void) } if (ORTE_SUCCESS != (rc = orte_ns.create_process_name( &(orte_process_info.my_name), - cellid, jobid, vpid))) { ORTE_ERROR_LOG(rc); diff --git a/orte/mca/sds/slurm/sds_slurm_module.c b/orte/mca/sds/slurm/sds_slurm_module.c index 3a292ff449..06470f396f 100644 --- a/orte/mca/sds/slurm/sds_slurm_module.c +++ b/orte/mca/sds/slurm/sds_slurm_module.c @@ -57,7 +57,7 @@ orte_sds_slurm_set_name(void) char* name_string = NULL; int slurm_nodeid; - /* start by getting our cellid, jobid, and vpid (which is the + /* start by getting our jobid, and vpid (which is the starting vpid for the list of daemons) */ id = mca_base_param_register_string("ns", "nds", "name", NULL, NULL); mca_base_param_lookup_string(id, &name_string); @@ -73,24 +73,11 @@ orte_sds_slurm_set_name(void) free(name_string); } else { - orte_cellid_t cellid; orte_jobid_t jobid; orte_vpid_t vpid; - char* cellid_string; char* jobid_string; char* vpid_string; - id = mca_base_param_register_string("ns", "nds", "cellid", NULL, NULL); - mca_base_param_lookup_string(id, &cellid_string); - if (NULL == cellid_string) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_cellid(&cellid, cellid_string))) { - ORTE_ERROR_LOG(rc); - return(rc); - } - id = mca_base_param_register_string("ns", "nds", "jobid", NULL, NULL); mca_base_param_lookup_string(id, &jobid_string); if (NULL == jobid_string) { @@ -114,7 +101,6 @@ orte_sds_slurm_set_name(void) } if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(&(orte_process_info.my_name), - cellid, jobid, vpid))) { ORTE_ERROR_LOG(rc); diff --git a/orte/mca/sds/xcpu/sds_xcpu_module.c b/orte/mca/sds/xcpu/sds_xcpu_module.c index b167b8f6e3..b8b7aae131 100644 --- a/orte/mca/sds/xcpu/sds_xcpu_module.c +++ b/orte/mca/sds/xcpu/sds_xcpu_module.c @@ -63,27 +63,14 @@ int orte_sds_xcpu_set_name(void) } else { - orte_cellid_t cellid; orte_jobid_t jobid; orte_vpid_t vpid_start; - char* cellid_string; char* jobid_string; char* vpid_string; char *xcpu_rank_string; int xcpu_rank; int stride; - id = mca_base_param_register_string("ns", "nds", "cellid", NULL, NULL); - mca_base_param_lookup_string(id, &cellid_string); - if (NULL == cellid_string) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - return ORTE_ERR_NOT_FOUND; - } - if (ORTE_SUCCESS != (rc = orte_ns.convert_string_to_cellid(&cellid, cellid_string))) { - ORTE_ERROR_LOG(rc); - return(rc); - } - id = mca_base_param_register_string("ns", "nds", "jobid", NULL, NULL); mca_base_param_lookup_string(id, &jobid_string); if (NULL == jobid_string) { @@ -113,6 +100,11 @@ int orte_sds_xcpu_set_name(void) return rc; } + /* RHC: Hmmm...I don't see where the process name actually + * gets created here....does it always come in as a complete name? + * If so, then why have the above computation? + */ + #if 0 id = mca_base_param_register_string("ns", "nds", "global_vpid_start", NULL, NULL); mca_base_param_lookup_string(id, &vpid_string); diff --git a/orte/mca/smr/base/smr_base_get_node_state.c b/orte/mca/smr/base/smr_base_get_node_state.c index d8bcb182b6..99b6daa0fd 100644 --- a/orte/mca/smr/base/smr_base_get_node_state.c +++ b/orte/mca/smr/base/smr_base_get_node_state.c @@ -34,7 +34,6 @@ #include "orte/mca/smr/base/smr_private.h" int orte_smr_base_get_node_state(orte_node_state_t *state, - orte_cellid_t cell, char *nodename) { orte_gpr_value_t **values=NULL; @@ -47,7 +46,7 @@ int orte_smr_base_get_node_state(orte_node_state_t *state, }; orte_node_state_t *sptr; - if (ORTE_SUCCESS != (rc = orte_schema.get_node_tokens(&tokens, &num_tokens, cell, nodename))) { + if (ORTE_SUCCESS != (rc = orte_schema.get_node_tokens(&tokens, &num_tokens, nodename))) { ORTE_ERROR_LOG(rc); return rc; } diff --git a/orte/mca/smr/base/smr_base_open.c b/orte/mca/smr/base/smr_base_open.c index 0703a2790f..8b64fbfccb 100644 --- a/orte/mca/smr/base/smr_base_open.c +++ b/orte/mca/smr/base/smr_base_open.c @@ -73,7 +73,6 @@ orte_smr_base_module_t orte_smr = { */ static void orte_smr_node_tracker_construct(orte_smr_node_state_tracker_t* node) { - node->cell = ORTE_CELLID_INVALID; node->nodename = NULL; node->state = ORTE_NODE_STATE_UNKNOWN; } diff --git a/orte/mca/smr/base/smr_base_set_node_state.c b/orte/mca/smr/base/smr_base_set_node_state.c index fcc81cef82..ecba353ce4 100644 --- a/orte/mca/smr/base/smr_base_set_node_state.c +++ b/orte/mca/smr/base/smr_base_set_node_state.c @@ -30,8 +30,7 @@ #include "orte/mca/smr/base/smr_private.h" -int orte_smr_base_set_node_state(orte_cellid_t cell, - char *nodename, +int orte_smr_base_set_node_state(char *nodename, orte_node_state_t state) { orte_gpr_value_t *value; @@ -43,7 +42,7 @@ int orte_smr_base_set_node_state(orte_cellid_t cell, return rc; } - if (ORTE_SUCCESS != (rc = orte_schema.get_node_tokens(&(value->tokens), &(value->num_tokens), cell, nodename))) { + if (ORTE_SUCCESS != (rc = orte_schema.get_node_tokens(&(value->tokens), &(value->num_tokens), nodename))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(value); return rc; diff --git a/orte/mca/smr/base/smr_private.h b/orte/mca/smr/base/smr_private.h index 9cc8771fba..54efb76b14 100644 --- a/orte/mca/smr/base/smr_private.h +++ b/orte/mca/smr/base/smr_private.h @@ -48,7 +48,6 @@ extern "C" { */ typedef struct { opal_list_item_t super; - orte_cellid_t cell; char *nodename; orte_node_state_t state; } orte_smr_node_state_tracker_t; @@ -63,11 +62,9 @@ int orte_smr_base_set_proc_state(orte_process_name_t *proc, int status); int orte_smr_base_get_node_state(orte_node_state_t *state, - orte_cellid_t cell, char *nodename); -int orte_smr_base_set_node_state(orte_cellid_t cell, - char *nodename, +int orte_smr_base_set_node_state(char *nodename, orte_node_state_t state); int orte_smr_base_get_job_state(orte_job_state_t *state, diff --git a/orte/mca/smr/bproc/smr_bproc.c b/orte/mca/smr/bproc/smr_bproc.c index 4d2ef453e2..91a72ad795 100644 --- a/orte/mca/smr/bproc/smr_bproc.c +++ b/orte/mca/smr/bproc/smr_bproc.c @@ -246,8 +246,7 @@ static void update_registry(bit_set changes, struct bproc_node_info_t *ni) return; } - ret = orte_schema.get_node_tokens(&(value->tokens), &(value->num_tokens), - ORTE_PROC_MY_NAME->cellid, node_name); + ret = orte_schema.get_node_tokens(&(value->tokens), &(value->num_tokens), node_name); if (ret != ORTE_SUCCESS) { ORTE_ERROR_LOG(ret); diff --git a/orte/mca/smr/smr.h b/orte/mca/smr/smr.h index 11b9f69ed2..a79257a507 100644 --- a/orte/mca/smr/smr.h +++ b/orte/mca/smr/smr.h @@ -65,13 +65,11 @@ typedef int (*orte_smr_base_module_set_proc_state_fn_t)(orte_process_name_t *pro * Query a node state */ typedef int (*orte_smr_base_module_get_node_state_fn_t)(orte_node_state_t *state, - orte_cellid_t cell, char *nodename); /* * Set a node state */ -typedef int (*orte_smr_base_module_set_node_state_fn_t)(orte_cellid_t cell, - char *nodename, +typedef int (*orte_smr_base_module_set_node_state_fn_t)(char *nodename, orte_node_state_t state); /* diff --git a/orte/mca/snapc/base/snapc_base_fns.c b/orte/mca/snapc/base/snapc_base_fns.c index 874cc1ea41..826cec90b9 100644 --- a/orte/mca/snapc/base/snapc_base_fns.c +++ b/orte/mca/snapc/base/snapc_base_fns.c @@ -71,7 +71,6 @@ OBJ_CLASS_INSTANCE(orte_snapc_base_snapshot_t, void orte_snapc_base_snapshot_construct(orte_snapc_base_snapshot_t *snapshot) { - snapshot->process_name.cellid = 0; snapshot->process_name.jobid = 0; snapshot->process_name.vpid = 0; snapshot->process_pid = 0; @@ -81,7 +80,6 @@ void orte_snapc_base_snapshot_construct(orte_snapc_base_snapshot_t *snapshot) void orte_snapc_base_snapshot_destruct( orte_snapc_base_snapshot_t *snapshot) { - snapshot->process_name.cellid = 0; snapshot->process_name.jobid = 0; snapshot->process_name.vpid = 0; snapshot->process_pid = 0; @@ -1505,7 +1503,6 @@ int orte_snapc_base_extract_metadata(orte_snapc_base_global_snapshot_t *global_s vpid_snapshot = OBJ_NEW(orte_snapc_base_snapshot_t); - vpid_snapshot->process_name.cellid = proc->cellid; vpid_snapshot->process_name.jobid = proc->jobid; vpid_snapshot->process_name.vpid = proc->vpid; } diff --git a/orte/mca/snapc/full/snapc_full_app.c b/orte/mca/snapc/full/snapc_full_app.c index 709d9d3b7b..63d1e0b004 100644 --- a/orte/mca/snapc/full/snapc_full_app.c +++ b/orte/mca/snapc/full/snapc_full_app.c @@ -58,8 +58,7 @@ int app_coord_init() { */ opal_output_verbose(20, mca_snapc_full_component.super.output_handle, - "app) Initalized for Application (%d.%d.%d)\n", - orte_process_info.my_name->cellid, + "app) Initalized for Application (%d.%d)\n", orte_process_info.my_name->jobid, orte_process_info.my_name->vpid); diff --git a/orte/mca/snapc/full/snapc_full_global.c b/orte/mca/snapc/full/snapc_full_global.c index 4fb7c5349f..808260f868 100644 --- a/orte/mca/snapc/full/snapc_full_global.c +++ b/orte/mca/snapc/full/snapc_full_global.c @@ -146,7 +146,6 @@ int global_coord_setup_job(orte_jobid_t jobid) { vpid_snapshot = OBJ_NEW(orte_snapc_base_snapshot_t); - vpid_snapshot->process_name.cellid = 0; vpid_snapshot->process_name.jobid = jobid; vpid_snapshot->process_name.vpid = i; vpid_snapshot->term = false; @@ -399,8 +398,7 @@ static void vpid_ckpt_state_callback(orte_gpr_notify_data_t *data, void *cbdata) } opal_output_verbose(20, mca_snapc_full_component.super.output_handle, - "global) Process (%d,%d,%d): Changed to state to:\n", - proc->cellid, + "global) Process (%d,%d): Changed to state to:\n", proc->jobid, proc->vpid); opal_output_verbose(20, mca_snapc_full_component.super.output_handle, @@ -419,8 +417,7 @@ static void vpid_ckpt_state_callback(orte_gpr_notify_data_t *data, void *cbdata) orte_snapc_base_snapshot_t *vpid_snapshot; vpid_snapshot = (orte_snapc_base_snapshot_t*)item; - if(vpid_snapshot->process_name.cellid == proc->cellid && - vpid_snapshot->process_name.jobid == proc->jobid && + if(vpid_snapshot->process_name.jobid == proc->jobid && vpid_snapshot->process_name.vpid == proc->vpid) { vpid_snapshot->state = ckpt_state; @@ -565,7 +562,6 @@ static int snapc_full_reg_vpid_state_updates( orte_jobid_t jobid, orte_vpid_t *v } for ( vpid = *vpid_start; vpid < *vpid_start + *vpid_range; ++vpid) { - proc.cellid = 0; proc.jobid = jobid; proc.vpid = vpid; @@ -901,7 +897,6 @@ static int snapc_full_global_gather_all_files(void) { /* * Construct the process information */ - filem_request->proc_name[0].cellid = vpid_snapshot->process_name.cellid; filem_request->proc_name[0].jobid = vpid_snapshot->process_name.jobid; filem_request->proc_name[0].vpid = vpid_snapshot->process_name.vpid; diff --git a/orte/mca/snapc/full/snapc_full_local.c b/orte/mca/snapc/full/snapc_full_local.c index 52a66b9696..ca8a7e91f8 100644 --- a/orte/mca/snapc/full/snapc_full_local.c +++ b/orte/mca/snapc/full/snapc_full_local.c @@ -681,7 +681,6 @@ static int snapc_full_local_get_vpids(void) /* The pid is not known at this time, we will update it later */ vpid_snapshot->process_pid = 0; - vpid_snapshot->process_name.cellid = proc_name->cellid; vpid_snapshot->process_name.jobid = proc_name->jobid; vpid_snapshot->process_name.vpid = proc_name->vpid; diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index 3fdd81ce95..2381c8d310 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -490,8 +490,8 @@ int orte_daemon(int argc, char *argv[]) * for debugging purposes */ if (orte_debug_daemons_flag) { - fprintf(stderr, "Daemon [%ld,%ld,%ld] checking in as pid %ld on host %s\n", - ORTE_NAME_ARGS(orte_process_info.my_name), (long)orte_process_info.pid, + fprintf(stderr, "Daemon %s checking in as pid %ld on host %s\n", + ORTE_NAME_PRINT(orte_process_info.my_name), (long)orte_process_info.pid, orte_system_info.nodename); } @@ -551,7 +551,7 @@ int orte_daemon(int argc, char *argv[]) } if (orte_debug_daemons_flag) { - opal_output(0, "[%lu,%lu,%lu] orted: up and running - waiting for commands!", ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s orted: up and running - waiting for commands!", ORTE_NAME_PRINT(orte_process_info.my_name)); } while (false == orted_globals.exit_condition) { @@ -561,7 +561,7 @@ int orte_daemon(int argc, char *argv[]) OPAL_THREAD_UNLOCK(&orted_globals.mutex); if (orte_debug_daemons_flag) { - opal_output(0, "[%lu,%lu,%lu] orted: mutex cleared - finalizing", ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s orted: mutex cleared - finalizing", ORTE_NAME_PRINT(orte_process_info.my_name)); } /* cleanup */ @@ -608,9 +608,9 @@ void orte_daemon_recv_routed(int status, orte_process_name_t* sender, OPAL_THREAD_LOCK(&orted_globals.mutex); if (orte_debug_daemons_flag) { - opal_output(0, "[%lu,%lu,%lu] orted_recv_routed: received message from [%ld,%ld,%ld]", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(sender)); + opal_output(0, "%s orted_recv_routed: received message from %s", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(sender)); } /* unpack the routing algorithm */ @@ -655,9 +655,9 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, OPAL_THREAD_LOCK(&orted_globals.mutex); if (orte_debug_daemons_flag) { - opal_output(0, "[%lu,%lu,%lu] orted_recv_cmd: received message from [%ld,%ld,%ld]", - ORTE_NAME_ARGS(orte_process_info.my_name), - ORTE_NAME_ARGS(sender)); + opal_output(0, "%s orted_recv_cmd: received message from %s", + ORTE_NAME_PRINT(orte_process_info.my_name), + ORTE_NAME_PRINT(sender)); } /* process the command */ @@ -758,8 +758,8 @@ static int process_commands(orte_process_name_t* sender, for (n=0; n < num_jobs; n++) { if (orte_debug_daemons_flag) { - opal_output(0, "[%lu,%lu,%lu] orted_cmd: received kill_local_procs for job %ld", - ORTE_NAME_ARGS(orte_process_info.my_name), (long)jobs[n]); + opal_output(0, "%s orted_cmd: received kill_local_procs for job %ld", + ORTE_NAME_PRINT(orte_process_info.my_name), (long)jobs[n]); } if (ORTE_SUCCESS != (ret = orte_odls.kill_local_procs(jobs[n], true))) { @@ -772,8 +772,8 @@ static int process_commands(orte_process_name_t* sender, /**** SIGNAL_LOCAL_PROCS ****/ case ORTE_DAEMON_SIGNAL_LOCAL_PROCS: if (orte_debug_daemons_flag) { - opal_output(0, "[%lu,%lu,%lu] orted_cmd: received signal_local_procs", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s orted_cmd: received signal_local_procs", + ORTE_NAME_PRINT(orte_process_info.my_name)); } /* unpack the number of jobids */ n = 1; @@ -807,8 +807,8 @@ static int process_commands(orte_process_name_t* sender, /**** ADD_LOCAL_PROCS ****/ case ORTE_DAEMON_ADD_LOCAL_PROCS: if (orte_debug_daemons_flag) { - opal_output(0, "[%lu,%lu,%lu] orted_cmd: received add_local_procs", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s orted_cmd: received add_local_procs", + ORTE_NAME_PRINT(orte_process_info.my_name)); } /* unpack the notify data object */ n = 1; @@ -829,8 +829,8 @@ static int process_commands(orte_process_name_t* sender, /**** DELIVER A MESSAGE TO THE LOCAL PROCS ****/ case ORTE_DAEMON_MESSAGE_LOCAL_PROCS: if (orte_debug_daemons_flag) { - opal_output(0, "[%lu,%lu,%lu] orted_cmd: received message_local_procs", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s orted_cmd: received message_local_procs", + ORTE_NAME_PRINT(orte_process_info.my_name)); } /* unpack the jobid of the procs that are to receive the message */ @@ -932,8 +932,8 @@ static int process_commands(orte_process_name_t* sender, * the same as an exit_vm "hard kill" command */ if (orte_debug_daemons_flag) { - opal_output(0, "[%lu,%lu,%lu] orted_cmd: received exit", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s orted_cmd: received exit", + ORTE_NAME_PRINT(orte_process_info.my_name)); } /* no response to send here - we'll send it when nearly exit'd */ orted_globals.exit_condition = true; @@ -948,8 +948,8 @@ static int process_commands(orte_process_name_t* sender, /**** HALT VM COMMAND ****/ case ORTE_DAEMON_HALT_VM_CMD: if (orte_debug_daemons_flag) { - opal_output(0, "[%lu,%lu,%lu] orted_cmd: received halt vm", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s orted_cmd: received halt vm", + ORTE_NAME_PRINT(orte_process_info.my_name)); } /* if we are the HNP, then terminate all orteds reporting to us */ if (orte_process_info.seed) { @@ -972,8 +972,8 @@ static int process_commands(orte_process_name_t* sender, /**** CONTACT QUERY COMMAND ****/ case ORTE_DAEMON_CONTACT_QUERY_CMD: if (orte_debug_daemons_flag) { - opal_output(0, "[%lu,%lu,%lu] orted_cmd: received contact query", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s orted_cmd: received contact query", + ORTE_NAME_PRINT(orte_process_info.my_name)); } /* send back contact info */ contact_info = orte_rml.get_contact_info(); @@ -1021,8 +1021,8 @@ static int process_commands(orte_process_name_t* sender, case ORTE_DAEMON_WARMUP_LOCAL_CONN: /* nothing to do here - just ignore it */ if (orte_debug_daemons_flag) { - opal_output(0, "[%lu,%lu,%lu] orted_recv: received connection from local proc", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s orted_recv: received connection from local proc", + ORTE_NAME_PRINT(orte_process_info.my_name)); } ret = ORTE_SUCCESS; break; @@ -1101,7 +1101,6 @@ static int binomial_route_msg(orte_process_name_t *sender, hibit = opal_hibit(rank, bitmap); --bitmap; - target.cellid = ORTE_PROC_MY_NAME->cellid; target.jobid = 0; for (i = hibit + 1, mask = 1 << i; i <= bitmap; ++i, mask <<= 1) { peer = rank | mask; diff --git a/orte/runtime/Makefile.am b/orte/runtime/Makefile.am index c2ececb2cc..607afd1b1a 100644 --- a/orte/runtime/Makefile.am +++ b/orte/runtime/Makefile.am @@ -28,7 +28,6 @@ headers += \ runtime/runtime_internal.h \ runtime/runtime_types.h \ runtime/params.h \ - runtime/orte_setup_hnp.h \ runtime/orte_cr.h libopen_rte_la_SOURCES += \ @@ -40,7 +39,6 @@ libopen_rte_la_SOURCES += \ runtime/orte_init_stage2.c \ runtime/orte_monitor.c \ runtime/orte_restart.c \ - runtime/orte_setup_hnp.c \ runtime/orte_system_finalize.c \ runtime/orte_system_init.c \ runtime/orte_universe_exists.c \ diff --git a/orte/runtime/orte_init_stage1.c b/orte/runtime/orte_init_stage1.c index e71044658e..88143e58d7 100644 --- a/orte/runtime/orte_init_stage1.c +++ b/orte/runtime/orte_init_stage1.c @@ -426,10 +426,10 @@ int orte_init_stage1(bool infrastructure) error = "orte_ns.get_vpid_string"; goto error; } - + if (orte_debug_flag) { - opal_output(0, "[%lu,%lu,%lu] setting up session dir with", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s setting up session dir with", + ORTE_NAME_PRINT(orte_process_info.my_name)); if (NULL != orte_process_info.tmpdir_base) { opal_output(0, "\ttmpdir %s", orte_process_info.tmpdir_base); } @@ -469,16 +469,16 @@ int orte_init_stage1(bool infrastructure) contact_path = opal_os_path(false, orte_process_info.universe_session_dir, "universe-setup.txt", NULL); if (orte_debug_flag) { - opal_output(0, "[%lu,%lu,%lu] contact_file %s", - ORTE_NAME_ARGS(orte_process_info.my_name), contact_path); + opal_output(0, "%s contact_file %s", + ORTE_NAME_PRINT(orte_process_info.my_name), contact_path); } if (ORTE_SUCCESS != (ret = orte_write_universe_setup_file(contact_path, &orte_universe_info))) { if (orte_debug_flag) { - opal_output(0, "[%lu,%lu,%lu] couldn't write setup file", ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s couldn't write setup file", ORTE_NAME_PRINT(orte_process_info.my_name)); } } else if (orte_debug_flag) { - opal_output(0, "[%lu,%lu,%lu] wrote setup file", ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s wrote setup file", ORTE_NAME_PRINT(orte_process_info.my_name)); } free(contact_path); } @@ -630,7 +630,7 @@ int orte_init_stage1(bool infrastructure) if (ORTE_SUCCESS != (ret = orte_gpr.create_value(&(values[0]), ORTE_GPR_OVERWRITE|ORTE_GPR_TOKENS_AND, - segment, 8, 0))) { + segment, 7, 0))) { ORTE_ERROR_LOG(ret); error = "singleton could not create gpr value"; goto error; @@ -647,36 +647,30 @@ int orte_init_stage1(bool infrastructure) goto error; } - if (ORTE_SUCCESS != (ret = orte_gpr.create_keyval(&(values[0]->keyvals[2]), ORTE_CELLID_KEY, ORTE_CELLID, &(ORTE_PROC_MY_NAME->cellid)))) { + if (ORTE_SUCCESS != (ret = orte_gpr.create_keyval(&(values[0]->keyvals[2]), ORTE_NODE_NAME_KEY, ORTE_STRING, orte_system_info.nodename))) { + ORTE_ERROR_LOG(ret); + error = "singleton could not create keyval"; + goto error; + } + if (ORTE_SUCCESS != (ret = orte_gpr.create_keyval(&(values[0]->keyvals[3]), ORTE_PROC_APP_CONTEXT_KEY, ORTE_STD_CNTR, &zero))) { ORTE_ERROR_LOG(ret); error = "singleton could not create keyval"; goto error; } - if (ORTE_SUCCESS != (ret = orte_gpr.create_keyval(&(values[0]->keyvals[3]), ORTE_NODE_NAME_KEY, ORTE_STRING, orte_system_info.nodename))) { - ORTE_ERROR_LOG(ret); - error = "singleton could not create keyval"; - goto error; - } - if (ORTE_SUCCESS != (ret = orte_gpr.create_keyval(&(values[0]->keyvals[4]), ORTE_PROC_APP_CONTEXT_KEY, ORTE_STD_CNTR, &zero))) { + if (ORTE_SUCCESS != (ret = orte_gpr.create_keyval(&(values[0]->keyvals[4]), ORTE_PROC_STATE_KEY, ORTE_PROC_STATE, &init))) { ORTE_ERROR_LOG(ret); error = "singleton could not create keyval"; goto error; } - if (ORTE_SUCCESS != (ret = orte_gpr.create_keyval(&(values[0]->keyvals[5]), ORTE_PROC_STATE_KEY, ORTE_PROC_STATE, &init))) { + if (ORTE_SUCCESS != (ret = orte_gpr.create_keyval(&(values[0]->keyvals[5]), ORTE_PROC_LOCAL_RANK_KEY, ORTE_VPID, &lrank))) { ORTE_ERROR_LOG(ret); error = "singleton could not create keyval"; goto error; } - if (ORTE_SUCCESS != (ret = orte_gpr.create_keyval(&(values[0]->keyvals[6]), ORTE_PROC_LOCAL_RANK_KEY, ORTE_VPID, &lrank))) { - ORTE_ERROR_LOG(ret); - error = "singleton could not create keyval"; - goto error; - } - - if (ORTE_SUCCESS != (ret = orte_gpr.create_keyval(&(values[0]->keyvals[7]), ORTE_NODE_NUM_PROCS_KEY, ORTE_STD_CNTR, &one))) { + if (ORTE_SUCCESS != (ret = orte_gpr.create_keyval(&(values[0]->keyvals[6]), ORTE_NODE_NUM_PROCS_KEY, ORTE_STD_CNTR, &one))) { ORTE_ERROR_LOG(ret); error = "singleton could not create keyval"; goto error; diff --git a/orte/runtime/orte_monitor.c b/orte/runtime/orte_monitor.c index 9a9d44605f..e7c8c4914b 100644 --- a/orte/runtime/orte_monitor.c +++ b/orte/runtime/orte_monitor.c @@ -51,8 +51,8 @@ static bool ompi_rte_waiting = false; void orte_all_procs_registered(orte_gpr_notify_message_t* match, void* cbdata) { if (orte_debug_flag) { - opal_output(0, "[%lu,%lu,%lu] all procs registered", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s all procs registered", + ORTE_NAME_PRINT(orte_process_info.my_name)); } OPAL_THREAD_LOCK(&ompi_rte_mutex); diff --git a/orte/runtime/orte_restart.c b/orte/runtime/orte_restart.c index 17a81f7bae..ceeb03386f 100644 --- a/orte/runtime/orte_restart.c +++ b/orte/runtime/orte_restart.c @@ -177,8 +177,8 @@ int orte_restart(orte_process_name_t *name, const char* uri) } if (orte_debug_flag) { - opal_output(0, "[%lu,%lu,%lu] setting up session dir with", - ORTE_NAME_ARGS(orte_process_info.my_name)); + opal_output(0, "%s setting up session dir with", + ORTE_NAME_PRINT(orte_process_info.my_name)); if (NULL != orte_process_info.tmpdir_base) { opal_output(0, "\ttmpdir %s", orte_process_info.tmpdir_base); } diff --git a/orte/runtime/orte_setup_hnp.c b/orte/runtime/orte_setup_hnp.c deleted file mode 100644 index 08182eacd9..0000000000 --- a/orte/runtime/orte_setup_hnp.c +++ /dev/null @@ -1,579 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * Copyright (c) 2007 Los Alamos National Security, LLC. All rights - * reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - * Establish a Head Node Process on a cluster's front end - */ - - -#include "orte_config.h" - -#include -#ifdef HAVE_UNISTD_H -#include -#endif -#include -#include -#include -#include -#ifdef HAVE_SYS_WAIT_H -#include -#endif -#include - -#include "orte/orte_constants.h" - -#include "opal/event/event.h" -#include "opal/threads/mutex.h" -#include "opal/threads/condition.h" -#include "opal/util/argv.h" -#include "opal/util/opal_environ.h" -#include "opal/util/output.h" -#include "opal/util/path.h" -#include "opal/util/os_path.h" -#include "opal/mca/base/mca_base_param.h" - -#include "orte/dss/dss.h" -#include "orte/runtime/orte_wait.h" -#include "orte/util/univ_info.h" -#include "orte/util/sys_info.h" -#include "orte/util/proc_info.h" -#include "orte/util/session_dir.h" -#include "orte/util/universe_setup_file_io.h" -#include "orte/mca/smr/smr.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/rds/rds_types.h" -#include "orte/mca/ns/ns.h" -#include "orte/mca/gpr/gpr.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/runtime/runtime.h" -#include "orte/runtime/orte_setup_hnp.h" - -/* Local condition variables and mutex - */ -static opal_mutex_t orte_setup_hnp_mutex; -static opal_condition_t orte_setup_hnp_condition; -/* Local return code */ -static int orte_setup_hnp_rc; -/* Local uri storage */ -static char *orte_setup_hnp_orted_uri; - -static orte_setup_hnp_cb_data_t orte_setup_hnp_cbdata; - -/* - * NON-BLOCKING RECEIVER - */ -static void orte_setup_hnp_recv(int status, orte_process_name_t* sender, - orte_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata); - -/* - * PID WAIT CALLBACK - */ -static void orte_setup_hnp_wait(pid_t wpid, int status, void *data); - - -/* - * ORTE_SETUP_HNP - */ -int orte_setup_hnp(char *target_cluster, char *headnode, char *username) -{ - char **argv, *param, *uri, *uid, *hn=NULL; - char *path, *name_string, *orteprobe; - int argc, rc=ORTE_SUCCESS, id, intparam; - pid_t pid; - bool can_launch=false, on_gpr=false; - orte_cellid_t cellid=ORTE_CELLID_MAX, *cptr; - orte_jobid_t jobid; - orte_vpid_t vpid; - orte_std_cntr_t i, j, k, cnt=0; - orte_gpr_value_t **values=NULL, *value; - orte_gpr_keyval_t **keyvals; - char *keys[4], *tokens[3], *cellname; - struct timeval tv; - struct timespec ts; - bool *bptr, tf_flag; - - /* get the nodename for the headnode of the target cluster */ - if (NULL == headnode) { /* not provided, so try to look it up */ - tokens[0] = target_cluster; - tokens[1] = NULL; - keys[0] = ORTE_RDS_FE_NAME; - keys[1] = ORTE_RDS_FE_SSH; - keys[2] = ORTE_CELLID_KEY; - keys[3] = NULL; - if (ORTE_SUCCESS != (rc = orte_gpr.get(ORTE_GPR_TOKENS_OR | ORTE_GPR_KEYS_OR, - ORTE_RESOURCE_SEGMENT, - tokens, keys, &cnt, &values))) { - ORTE_ERROR_LOG(rc); - return rc; - } - if (0 == cnt || 0 == values[0]->cnt) { /* nothing found */ - goto MOVEON; - } - on_gpr = true; - /* need to decide what to do if more than value found. Some - * clusters have more than one head node, so which one do - * we choose? For now, just take the first one returned. - */ - keyvals = values[0]->keyvals; - for (i=0; i < values[0]->cnt; i++) { - if (0 == strcmp(keyvals[i]->key, ORTE_RDS_FE_NAME)) { - hn = strdup((const char*)keyvals[i]->value->data); - continue; - } - if (0 == strcmp(keyvals[i]->key, ORTE_RDS_FE_SSH)) { - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&bptr, keyvals[i]->value, ORTE_BOOL))) { - ORTE_ERROR_LOG(rc); - return rc; - } - can_launch = *bptr; - continue; - } - if (0 == strcmp(keyvals[i]->key, ORTE_CELLID_KEY)) { - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&cptr, keyvals[i]->value, ORTE_CELLID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - cellid = *cptr; - continue; - } - } - goto MOVEON; - - } else { /* lookup the headnode's cellid */ - hn = strdup(headnode); - keys[0] = ORTE_RDS_FE_NAME; - keys[1] = ORTE_RDS_FE_SSH; - keys[2] = ORTE_CELLID_KEY; - keys[3] = NULL; - - rc = orte_gpr.get(ORTE_GPR_TOKENS_OR | ORTE_GPR_KEYS_OR, - ORTE_RESOURCE_SEGMENT, - NULL, keys, &cnt, &values); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* Nothing found */ - if (0 == cnt || 0 == values[0]->cnt) { - goto MOVEON; - } - - on_gpr = true; - for (i=0; i < cnt; i++) { - keyvals = values[i]->keyvals; - for (j=0; j < values[i]->cnt; j++) { - if ((0 == strcmp(keyvals[j]->key, ORTE_RDS_FE_NAME)) && - 0 == strcmp((const char*)keyvals[j]->value->data, headnode)) { - /* okay, this is the right cell - now need to find - * the ssh flag (if provided) and cellid - */ - for (k=0; k < values[i]->cnt; k++) { - if (0 == strcmp(keyvals[k]->key, ORTE_RDS_FE_SSH)) { - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&bptr, keyvals[i]->value, ORTE_BOOL))) { - ORTE_ERROR_LOG(rc); - return rc; - } - can_launch = *bptr; - continue; - } - if (0 == strcmp(keyvals[k]->key, ORTE_CELLID_KEY)) { - if (ORTE_SUCCESS != (rc = orte_dss.get((void**)&cptr, keyvals[i]->value, ORTE_CELLID))) { - ORTE_ERROR_LOG(rc); - return rc; - } - cellid = *cptr; - continue; - } - } - goto MOVEON; - } - } - } - } - -MOVEON: - if (NULL != values) { - for (i=0; i < cnt; i++) - OBJ_RELEASE(values[i]); - free(values); - } - - if (!on_gpr && (NULL != target_cluster || NULL != headnode)) { - /* if we couldn't find anything about this cell on the gpr, then - * we need to put the required headnode data on the registry. We need - * it to be there so other functions/processes can find it, if needed. - * User must provide either a target_cluster name (which then must be - * synonymous with the headnode name), a headnode name (on a named or - * unnamed target_cluster), or both. - */ - - /* get new cellid for this site/resource */ - if (NULL != target_cluster) { - cellname = strdup(target_cluster); - } else { - /* if the target_cluster was NULL, then headnode CAN'T be NULL - * or else we wouldn't get here - */ - cellname = strdup(headnode); - } - - /* can't know the site name, so it becomes "unknown" */ - rc = orte_ns.create_cellid(&cellid, "unknown", cellname); - if (ORTE_SUCCESS != rc ) { - ORTE_ERROR_LOG(rc); - free(cellname); - return rc; - } - - /* - * Store the cell info on the resource segment of the registry - */ - if (ORTE_SUCCESS != (rc = orte_gpr.create_value(&value, ORTE_GPR_TOKENS_XAND | ORTE_GPR_KEYS_OR, - ORTE_RESOURCE_SEGMENT, 4, 0))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - rc = orte_schema.get_node_tokens(&(value->tokens), &(value->num_tokens), cellid, cellname); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(value); - return rc; - } - - /* Set Cell Name */ - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[0]), ORTE_RDS_NAME, ORTE_STRING, cellname))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(value); - return rc; - } - - /* Set Cell ID */ - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[1]), ORTE_CELLID_KEY, ORTE_CELLID, &cellid))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(value); - return rc; - } - - /* Set Front End Name */ - if (NULL == headnode) { - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[2]), ORTE_RDS_FE_NAME, ORTE_STRING, cellname))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(value); - return rc; - } - } else { - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[2]), ORTE_RDS_FE_NAME, ORTE_STRING, headnode))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(value); - return rc; - } - } - - /* Asssume ability to ssh to front end node*/ - tf_flag = true; - if (ORTE_SUCCESS != (rc = orte_gpr.create_keyval(&(value->keyvals[3]), ORTE_RDS_FE_SSH, ORTE_BOOL, &tf_flag))) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(value); - return rc; - } - - /* Place value in GPR */ - rc = orte_gpr.put(1, &value); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(value); - return rc; - } - - OBJ_RELEASE(value); - free(cellname); - - can_launch = true; - } - - if (!can_launch || ORTE_CELLID_MAX == cellid) { - return ORTE_ERR_UNREACH; - } - - /* get the user's name on the headnode */ - if (NULL == username) { - uid = strdup(orte_system_info.user); - } else { - uid = strdup(username); - } - - /* SETUP TO LAUNCH PROBE */ - - /* setup the conditioned wait and mutex variables */ - OBJ_CONSTRUCT(&orte_setup_hnp_mutex, opal_mutex_t); - OBJ_CONSTRUCT(&orte_setup_hnp_condition, opal_condition_t); - - /* get a jobid for the probe */ - rc = orte_ns.create_jobid(&jobid, NULL); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* get a vpid for the probe */ - rc = orte_ns.reserve_range(jobid, 1, &vpid); - if (ORTE_SUCCESS != rc ) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* initialize probe's process name... */ - rc = orte_ns.create_process_name(&(orte_setup_hnp_cbdata.name), cellid, jobid, vpid); - if (ORTE_SUCCESS != rc) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* ...and get string representation */ - rc = orte_ns.get_proc_name_string(&name_string, orte_setup_hnp_cbdata.name); - if (ORTE_SUCCESS != rc ) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - - /* setup callback data on sigchild */ - if (NULL != target_cluster) { - orte_setup_hnp_cbdata.target_cluster = strdup(target_cluster); - } else { - orte_setup_hnp_cbdata.target_cluster = NULL; - } - - orte_setup_hnp_cbdata.headnode = strdup(headnode); - orte_setup_hnp_cbdata.jobid = jobid; - - /* get name of probe application - just in case user specified something different */ - id = mca_base_param_register_string("orteprobe",NULL,NULL,NULL,"orteprobe"); - mca_base_param_lookup_string(id, &orteprobe); - - /* get rsh/ssh launch mechanism parameters */ - id = mca_base_param_register_string("pls","rsh","agent",NULL,"ssh"); - mca_base_param_lookup_string(id, ¶m); - - /* Initialize the argv array */ - argv = opal_argv_split(param, ' '); - argc = opal_argv_count(argv); - if (argc <= 0) { - ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM); - rc = ORTE_ERR_BAD_PARAM; - goto CLEANUP; - } - free(param); - - /* setup the path */ - path = opal_path_findv(argv[0], 0, environ, NULL); - - /* add the username and nodename */ - opal_argv_append(&argc, &argv, "-l"); - opal_argv_append(&argc, &argv, uid); - opal_argv_append(&argc, &argv, hn); - - /* add the probe application */ - opal_argv_append(&argc, &argv, orteprobe); - - /* tell the probe it's name */ - opal_argv_append(&argc, &argv, "--name"); - opal_argv_append(&argc, &argv, name_string); - - /* setup probe's ns contact info */ - opal_argv_append(&argc, &argv, "--nsreplica"); - if(NULL != orte_process_info.ns_replica_uri) { - uri = strdup(orte_process_info.ns_replica_uri); - } else { - uri = orte_rml.get_contact_info(); - } - asprintf(¶m, "\"%s\"", uri); - opal_argv_append(&argc, &argv, param); - free(param); - free(uri); - - /* setup probe's gpr contact info */ - opal_argv_append(&argc, &argv, "--gprreplica"); - if(NULL != orte_process_info.gpr_replica_uri) { - uri = strdup(orte_process_info.gpr_replica_uri); - } else { - uri = orte_rml.get_contact_info(); - } - asprintf(¶m, "\"%s\"", uri); - opal_argv_append(&argc, &argv, param); - free(param); - free(uri); - - /* tell the probe who to report to */ - uri = orte_rml.get_contact_info(); - asprintf(¶m, "\"%s\"", uri); - opal_argv_append(&argc, &argv, "--requestor"); - opal_argv_append(&argc, &argv, param); - free(param); - free(uri); - - /* pass along any parameters for the head node process - * in case one needs to be created - */ - id = mca_base_param_register_string("scope",NULL,NULL,NULL,"public"); - mca_base_param_lookup_string(id, ¶m); - opal_argv_append(&argc, &argv, "--scope"); - opal_argv_append(&argc, &argv, param); - free(param); - - id = mca_base_param_register_int("persistent",NULL,NULL,NULL,(int)false); - mca_base_param_lookup_int(id, &intparam); - if (intparam) { - opal_argv_append(&argc, &argv, "--persistent"); - } - - /* issue the non-blocking recv to get the probe's findings */ - rc = orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_PROBE, - 0, orte_setup_hnp_recv, NULL); - if(rc < 0) { - ORTE_ERROR_LOG(rc); - goto CLEANUP; - } - -#ifndef __WINDOWS__ - /* fork a child to exec the rsh/ssh session */ - orte_setup_hnp_rc = ORTE_SUCCESS; - pid = fork(); - if (pid < 0) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - rc = ORTE_ERR_OUT_OF_RESOURCE; - goto CLEANUP; - } - - if (pid == 0) { /* child */ - /* exec the probe launch */ - execv(path, argv); - ORTE_ERROR_LOG(ORTE_ERROR); - opal_output(0, "orte_setup_hnp: execv failed with errno=%d\n", errno); - return ORTE_ERROR; - - } else { /* parent */ - orte_wait_cb(pid, orte_setup_hnp_wait, &orte_setup_hnp_cbdata); - - /* block until a timeout occurs or probe dies/calls back */ - gettimeofday(&tv, NULL); - ts.tv_sec = tv.tv_sec + 1000000; - ts.tv_nsec = 0; - - OPAL_THREAD_LOCK(&orte_setup_hnp_mutex); - opal_condition_timedwait(&orte_setup_hnp_condition, &orte_setup_hnp_mutex, &ts); - OPAL_THREAD_UNLOCK(&orte_setup_hnp_mutex); - - if (ORTE_SUCCESS == orte_setup_hnp_rc) { - /* need to restart the local system so it can connect to the remote daemon. */ - if (ORTE_SUCCESS != (rc = orte_restart(orte_setup_hnp_cbdata.name, orte_setup_hnp_orted_uri))) { - /** can't use ORTE_ERROR_LOG here as it may no longer be valid. Since we may - * have gotten part way through the shutdown/restart process, we can't have - * any idea of our current state - all we can really do at this point is - * abort - */ - fprintf(stderr, "orte_setup_hnp: aborted during restart of local process\n"); - } - - /* - * ...and we are now ready to go! - */ - return ORTE_SUCCESS; - } - - return orte_setup_hnp_rc; - } -#else - ORTE_ERROR_LOG(ORTE_ERROR); - opal_output(0, "This function has not been implemented in windows yet, file %s line %d\n", __FILE__, __LINE__); - abort(); -#endif - -CLEANUP: - return rc; -} - -static void orte_setup_hnp_recv(int status, orte_process_name_t* sender, - orte_buffer_t* buffer, orte_rml_tag_t tag, - void* cbdata) -{ - orte_std_cntr_t n=1; - int rc; - - OPAL_THREAD_LOCK(&orte_setup_hnp_mutex); - if (ORTE_SUCCESS != (rc = orte_dss.unpack(buffer, &orte_setup_hnp_orted_uri, &n, ORTE_STRING))) { - ORTE_ERROR_LOG(rc); - orte_setup_hnp_rc = rc; - opal_condition_signal(&orte_setup_hnp_condition); - OPAL_THREAD_UNLOCK(&orte_setup_hnp_mutex); - return; - } - orte_setup_hnp_rc = ORTE_SUCCESS; - opal_condition_signal(&orte_setup_hnp_condition); - OPAL_THREAD_UNLOCK(&orte_setup_hnp_mutex); -} - -static void orte_setup_hnp_wait(pid_t wpid, int status, void *cbdata) -{ - orte_setup_hnp_cb_data_t *data; - - OPAL_THREAD_LOCK(&orte_setup_hnp_mutex); - - data = (orte_setup_hnp_cb_data_t*)cbdata; - - /* if ssh exited abnormally, print something useful to the user and cleanup - * the registry entries for the HNP jobid. - This should somehow be pushed up to the calling level, but we - don't really have a way to do that just yet. - */ - if (! WIFEXITED(status) || ! WEXITSTATUS(status) == 0) { - /* tell the user something went wrong */ - opal_output(0, "ERROR: The probe on head node %s of the %s cluster failed to start as expected.", - data->headnode, data->target_cluster); - opal_output(0, "ERROR: There may be more information available from"); - opal_output(0, "ERROR: the remote shell (see above)."); - if (WIFEXITED(status)) { - opal_output(0, "ERROR: The probe exited unexpectedly with status %d.", - WEXITSTATUS(status)); - } else if (WIFSIGNALED(status)) { -#ifdef WCOREDUMP - if (WCOREDUMP(status)) { - opal_output(0, "The probe received a signal %d (with core).", - WTERMSIG(status)); - } else { - opal_output(0, "The probe received a signal %d.", WTERMSIG(status)); - } -#else - opal_output(0, "The probe received a signal %d.", WTERMSIG(status)); -#endif /* WCOREDUMP */ - } else { - opal_output(0, "No extra status information is available: %d.", status); - } - } - - opal_condition_signal(&orte_setup_hnp_condition); - OPAL_THREAD_UNLOCK(&orte_setup_hnp_mutex); - -} - diff --git a/orte/runtime/orte_setup_hnp.h b/orte/runtime/orte_setup_hnp.h deleted file mode 100644 index 6018911c8f..0000000000 --- a/orte/runtime/orte_setup_hnp.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2006 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -/** - * @file - * - * Establish a Head Node Process on a cluster's front end - */ - -#ifndef ORTE_SETUP_HNP_H -#define ORTE_SETUP_HNP_H - -/* - * Local data structure - */ -typedef struct { - char *target_cluster; - char *headnode; - orte_process_name_t *name; - orte_jobid_t jobid; -} orte_setup_hnp_cb_data_t; - - - /** - * Establish a Head Node Process on a cluster's front end - */ -ORTE_DECLSPEC int orte_setup_hnp(char *target_cluster, char *headnode, char *username); - -#endif diff --git a/orte/test/system/orte_abort.c b/orte/test/system/orte_abort.c index 660fe41be3..4844576718 100644 --- a/orte/test/system/orte_abort.c +++ b/orte/test/system/orte_abort.c @@ -27,7 +27,7 @@ int main(int argc, char* argv[]) pid = getpid(); gethostname(hostname, 500); - printf("orte_abort: Name [%lu,%lu,%lu] Host: %s Pid %ld\n", ORTE_NAME_ARGS(orte_process_info.my_name), + printf("orte_abort: Name %s Host: %s Pid %ld\n", ORTE_NAME_PRINT(orte_process_info.my_name), hostname, (long)pid); i = 0; diff --git a/orte/test/system/orte_nodename.c b/orte/test/system/orte_nodename.c index a00ffefe75..9c87364fdc 100644 --- a/orte/test/system/orte_nodename.c +++ b/orte/test/system/orte_nodename.c @@ -25,8 +25,8 @@ int main(int argc, char* argv[]) gethostname(hostname, 512); pid = getpid(); - printf("orte_nodename: Node %s Name [%lu,%lu,%lu] Pid %ld Local Rank: %ld Num_local_procs %ld\n", - hostname, ORTE_NAME_ARGS(orte_process_info.my_name), (long)pid, + printf("orte_nodename: Node %s Name %s Pid %ld Local Rank: %ld Num_local_procs %ld\n", + hostname, ORTE_NAME_PRINT(orte_process_info.my_name), (long)pid, (long)orte_process_info.local_rank, (long)orte_process_info.num_local_procs); orte_finalize(); diff --git a/orte/test/system/orte_ring.c b/orte/test/system/orte_ring.c index 804b42cad6..56f192722b 100644 --- a/orte/test/system/orte_ring.c +++ b/orte/test/system/orte_ring.c @@ -36,14 +36,12 @@ main(int argc, char *argv[]){ /* * Construct Peer name in a ring */ - right_peer_orte_name.cellid = orte_process_info.my_name->cellid; right_peer_orte_name.jobid = orte_process_info.my_name->jobid; right_peer_orte_name.vpid = orte_process_info.my_name->vpid + 1; if( right_peer_orte_name.vpid >= num_peers ) { right_peer_orte_name.vpid = 0; } - left_peer_orte_name.cellid = orte_process_info.my_name->cellid; left_peer_orte_name.jobid = orte_process_info.my_name->jobid; left_peer_orte_name.vpid = orte_process_info.my_name->vpid - 1; if( orte_process_info.my_name->vpid == 0 ) { diff --git a/orte/test/system/orte_spawn.c b/orte/test/system/orte_spawn.c index a96ae73478..f32528b0e3 100644 --- a/orte/test/system/orte_spawn.c +++ b/orte/test/system/orte_spawn.c @@ -128,14 +128,13 @@ int main(int argc, char* argv[]) /* send messages to all children - this will verify that we know their contact info */ orte_ns.get_vpid_range(job, &range); - name.cellid = ORTE_PROC_MY_NAME->cellid; name.jobid = job; i = 1; msg.iov_base = (void *) &i; msg.iov_len = sizeof(i); for (i=0; i < range; i++) { name.vpid = i; - fprintf(stderr, "Parent: sending message to child [%ld,%ld,%ld]\n", ORTE_NAME_ARGS(&name)); + fprintf(stderr, "Parent: sending message to child %s\n", ORTE_NAME_PRINT(&name)); if (0 > (rc = orte_rml.send(&name, &msg, 1, MY_TAG, 0))) { ORTE_ERROR_LOG(rc); } diff --git a/orte/test/system/orte_stage_gate.c b/orte/test/system/orte_stage_gate.c index f8ed1c0c23..176881711c 100644 --- a/orte/test/system/orte_stage_gate.c +++ b/orte/test/system/orte_stage_gate.c @@ -109,12 +109,12 @@ int main(int argc, char* argv[]) } gethostname(hostname, 512); - printf("orte_nodename: Node %s Name [%lu,%lu,%lu]\n", hostname, ORTE_NAME_ARGS(orte_process_info.my_name)); + printf("orte_nodename: Node %s Name %s\n", hostname, ORTE_NAME_PRINT(orte_process_info.my_name)); orte_finalize(); return 0; error: - opal_output(0, "[%lu,%lu,%lu]: %s", ORTE_NAME_ARGS(orte_process_info.my_name), error); + opal_output(0, "%s: %s", ORTE_NAME_PRINT(orte_process_info.my_name), error); return rc; } diff --git a/orte/test/system/spawn_child.c b/orte/test/system/spawn_child.c index 0f5f01ce4d..6ab65c2443 100644 --- a/orte/test/system/spawn_child.c +++ b/orte/test/system/spawn_child.c @@ -34,7 +34,7 @@ int main(int argc, char* argv[]) printf("error at line %d\n", __LINE__); } - printf("CHILD [%lu,%lu,%lu] Node %s Pid %ld\n", ORTE_NAME_ARGS(orte_process_info.my_name), hostname, (long)pid); + printf("CHILD %s Node %s Pid %ld\n", ORTE_NAME_PRINT(orte_process_info.my_name), hostname, (long)pid); orte_finalize(); return 0; diff --git a/orte/test/system/spin.c b/orte/test/system/spin.c index 4ec0119be4..11820c173e 100644 --- a/orte/test/system/spin.c +++ b/orte/test/system/spin.c @@ -25,7 +25,7 @@ int main(int argc, char* argv[]) } pid = getpid(); - printf("spin: Name [%lu,%lu,%lu] Pid %ld\n", ORTE_NAME_ARGS(orte_process_info.my_name), (long)pid); + printf("spin: Name %s Pid %ld\n", ORTE_NAME_PRINT(orte_process_info.my_name), (long)pid); i = 0; while (1) { diff --git a/orte/test/unit/dss/dss_cmp.c b/orte/test/unit/dss/dss_cmp.c index 59b0c93e6b..985cc4f7f6 100644 --- a/orte/test/unit/dss/dss_cmp.c +++ b/orte/test/unit/dss/dss_cmp.c @@ -75,7 +75,6 @@ int main (int argc, char* argv[]) orte_process_info.seed = true; orte_process_info.my_name = (orte_process_name_t*)malloc(sizeof(orte_process_name_t)); - orte_process_info.my_name->cellid = 0; orte_process_info.my_name->jobid = 0; orte_process_info.my_name->vpid = 0; diff --git a/orte/test/unit/dss/dss_copy.c b/orte/test/unit/dss/dss_copy.c index 32713c9746..4f09add28e 100644 --- a/orte/test/unit/dss/dss_copy.c +++ b/orte/test/unit/dss/dss_copy.c @@ -75,7 +75,6 @@ int main (int argc, char* argv[]) orte_process_info.seed = true; orte_process_info.my_name = (orte_process_name_t*)malloc(sizeof(orte_process_name_t)); - orte_process_info.my_name->cellid = 0; orte_process_info.my_name->jobid = 0; orte_process_info.my_name->vpid = 0; diff --git a/orte/test/unit/dss/dss_inc_dec.c b/orte/test/unit/dss/dss_inc_dec.c index 1c4e4ab06c..dce98a95dd 100644 --- a/orte/test/unit/dss/dss_inc_dec.c +++ b/orte/test/unit/dss/dss_inc_dec.c @@ -74,7 +74,6 @@ int main (int argc, char* argv[]) orte_process_info.seed = true; orte_process_info.my_name = (orte_process_name_t*)malloc(sizeof(orte_process_name_t)); - orte_process_info.my_name->cellid = 0; orte_process_info.my_name->jobid = 0; orte_process_info.my_name->vpid = 0; diff --git a/orte/test/unit/dss/dss_print.c b/orte/test/unit/dss/dss_print.c index 3f28853979..05c2784151 100644 --- a/orte/test/unit/dss/dss_print.c +++ b/orte/test/unit/dss/dss_print.c @@ -75,7 +75,6 @@ int main (int argc, char* argv[]) orte_process_info.seed = true; orte_process_info.my_name = (orte_process_name_t*)malloc(sizeof(orte_process_name_t)); - orte_process_info.my_name->cellid = 0; orte_process_info.my_name->jobid = 0; orte_process_info.my_name->vpid = 0; diff --git a/orte/test/unit/dss/dss_release.c b/orte/test/unit/dss/dss_release.c index cfa0ccbe7a..8b4f18ac37 100644 --- a/orte/test/unit/dss/dss_release.c +++ b/orte/test/unit/dss/dss_release.c @@ -77,7 +77,6 @@ int main (int argc, char* argv[]) orte_process_info.seed = true; orte_process_info.my_name = (orte_process_name_t*)malloc(sizeof(orte_process_name_t)); - orte_process_info.my_name->cellid = 0; orte_process_info.my_name->jobid = 0; orte_process_info.my_name->vpid = 0; diff --git a/orte/test/unit/dss/dss_set_get.c b/orte/test/unit/dss/dss_set_get.c index 1aa06ab8a0..8ee2cf93a9 100644 --- a/orte/test/unit/dss/dss_set_get.c +++ b/orte/test/unit/dss/dss_set_get.c @@ -74,7 +74,6 @@ int main (int argc, char* argv[]) orte_process_info.seed = true; orte_process_info.my_name = (orte_process_name_t*)malloc(sizeof(orte_process_name_t)); - orte_process_info.my_name->cellid = 0; orte_process_info.my_name->jobid = 0; orte_process_info.my_name->vpid = 0; diff --git a/orte/test/unit/dss/dss_size.c b/orte/test/unit/dss/dss_size.c index a7a7cd6609..277fc2a22d 100644 --- a/orte/test/unit/dss/dss_size.c +++ b/orte/test/unit/dss/dss_size.c @@ -75,7 +75,6 @@ int main (int argc, char* argv[]) orte_process_info.seed = true; orte_process_info.my_name = (orte_process_name_t*)malloc(sizeof(orte_process_name_t)); - orte_process_info.my_name->cellid = 0; orte_process_info.my_name->jobid = 0; orte_process_info.my_name->vpid = 0; diff --git a/orte/test/unit/gpr/gpr_dt_copy.c b/orte/test/unit/gpr/gpr_dt_copy.c index d3be3cdee5..3efb21120e 100644 --- a/orte/test/unit/gpr/gpr_dt_copy.c +++ b/orte/test/unit/gpr/gpr_dt_copy.c @@ -68,7 +68,6 @@ int main(int argc, char **argv) orte_process_info.seed = true; orte_process_info.my_name = (orte_process_name_t*)malloc(sizeof(orte_process_name_t)); - orte_process_info.my_name->cellid = 0; orte_process_info.my_name->jobid = 0; orte_process_info.my_name->vpid = 0; diff --git a/orte/test/unit/gpr/gpr_dt_print.c b/orte/test/unit/gpr/gpr_dt_print.c index 99e3b2a803..e0cee6626a 100644 --- a/orte/test/unit/gpr/gpr_dt_print.c +++ b/orte/test/unit/gpr/gpr_dt_print.c @@ -68,7 +68,6 @@ int main(int argc, char **argv) orte_process_info.seed = true; orte_process_info.my_name = (orte_process_name_t*)malloc(sizeof(orte_process_name_t)); - orte_process_info.my_name->cellid = 0; orte_process_info.my_name->jobid = 0; orte_process_info.my_name->vpid = 0; diff --git a/orte/test/unit/gpr/gpr_dt_release.c b/orte/test/unit/gpr/gpr_dt_release.c index e21fbc0f5b..cf146aff95 100644 --- a/orte/test/unit/gpr/gpr_dt_release.c +++ b/orte/test/unit/gpr/gpr_dt_release.c @@ -68,7 +68,6 @@ int main(int argc, char **argv) orte_process_info.seed = true; orte_process_info.my_name = (orte_process_name_t*)malloc(sizeof(orte_process_name_t)); - orte_process_info.my_name->cellid = 0; orte_process_info.my_name->jobid = 0; orte_process_info.my_name->vpid = 0; diff --git a/orte/test/unit/gpr/gpr_dt_size.c b/orte/test/unit/gpr/gpr_dt_size.c index 0a758960f8..dad61b8c85 100644 --- a/orte/test/unit/gpr/gpr_dt_size.c +++ b/orte/test/unit/gpr/gpr_dt_size.c @@ -68,7 +68,6 @@ int main(int argc, char **argv) orte_process_info.seed = true; orte_process_info.my_name = (orte_process_name_t*)malloc(sizeof(orte_process_name_t)); - orte_process_info.my_name->cellid = 0; orte_process_info.my_name->jobid = 0; orte_process_info.my_name->vpid = 0; diff --git a/orte/test/unit/ns/ns_peers.c b/orte/test/unit/ns/ns_peers.c index 0dafc419c1..6accc9b049 100644 --- a/orte/test/unit/ns/ns_peers.c +++ b/orte/test/unit/ns/ns_peers.c @@ -131,7 +131,7 @@ int main(int argc, char **argv) jptr = peers; for (j=0; j < npeers; j++) { - fprintf(stderr, "get peers local: peer %ld, %ld, %ld\n", ORTE_NAME_ARGS(jptr)); + fprintf(stderr, "get peers local: peer %s\n", ORTE_NAME_PRINT(jptr)); jptr++; } free(peers); @@ -146,7 +146,7 @@ int main(int argc, char **argv) jptr = peers; for (j=0; j < npeers; j++) { - fprintf(stderr, "get peers for job %ld: peer %ld, %ld, %ld\n", (long)jobs[1], ORTE_NAME_ARGS(jptr)); + fprintf(stderr, "get peers for job %ld: peer %s\n", (long)jobs[1], ORTE_NAME_PRINT(jptr)); jptr++; } if (NULL != peers) free(peers); @@ -164,7 +164,7 @@ int main(int argc, char **argv) jptr = peers; for (j=0; j < npeers; j++) { - fprintf(stderr, "get peers with descendants for job %ld: peer %ld, %ld, %ld\n", (long)parent, ORTE_NAME_ARGS(jptr)); + fprintf(stderr, "get peers with descendants for job %ld: peer %s\n", (long)parent, ORTE_NAME_PRINT(jptr)); jptr++; } if (NULL != peers) free(peers); @@ -182,7 +182,7 @@ int main(int argc, char **argv) jptr = peers; for (j=0; j < npeers; j++) { - fprintf(stderr, "get peers with children only for job %ld: peer %ld, %ld, %ld\n", (long)parent, ORTE_NAME_ARGS(jptr)); + fprintf(stderr, "get peers with children only for job %ld: peer %s\n", (long)parent, ORTE_NAME_PRINT(jptr)); jptr++; } if (NULL != peers) free(peers); diff --git a/orte/test/unit/ns/ns_string_fns.c b/orte/test/unit/ns/ns_string_fns.c index 5f0cfdf27d..6c8c524ca6 100644 --- a/orte/test/unit/ns/ns_string_fns.c +++ b/orte/test/unit/ns/ns_string_fns.c @@ -29,7 +29,6 @@ int main(int argc, char **argv) { orte_process_name_t *test_name; - orte_cellid_t cell; orte_jobid_t job; orte_vpid_t vpid; int i, j, rc; @@ -46,7 +45,7 @@ int main(int argc, char **argv) ORTE_ERROR_NAME(rc)); exit(1); } else { - fprintf(stderr, "got process name: %ld %ld %ld\n", ORTE_NAME_ARGS(test_name)); + fprintf(stderr, "got process name: %s\n", ORTE_NAME_PRINT(test_name)); } free(test_name); @@ -57,27 +56,11 @@ int main(int argc, char **argv) ORTE_ERROR_NAME(rc)); exit(1); } else { - fprintf(stderr, "got process name: %ld %ld %ld\n", ORTE_NAME_ARGS(test_name)); + fprintf(stderr, "got process name: %s\n", ORTE_NAME_PRINT(test_name)); } free(tmp); free(test_name); - /* create a cellid */ - if (ORTE_SUCCESS != (rc = orte_ns.create_cellid(&cell, "dummy-site", "dummy-resource"))) { /* got error */ - fprintf(stderr, "create cellid: error with error %s\n", ORTE_ERROR_NAME(rc)); - exit(1); - } else { - fprintf(stderr, "cellid created: %lu\n", (unsigned long) cell); - } - - /* get cellid info */ - if (ORTE_SUCCESS != (rc = orte_ns.get_cell_info(cell, &site, &resource))) { /* got error */ - fprintf(stderr, "get_cell_info: error with error %s\n", ORTE_ERROR_NAME(rc)); - exit(1); - } else { - fprintf(stderr, "get_cell_info: %lu %s %s\n", (unsigned long) cell, site, resource); - } - for (i=0; i<10; i++) { /* loop through */ /* create jobid */ if (ORTE_SUCCESS != (rc = orte_ns.create_jobid(&job, NULL))) { /* got error */ @@ -99,7 +82,7 @@ int main(int argc, char **argv) } /* create a name */ - if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(&test_name, (orte_cellid_t)i, + if (ORTE_SUCCESS != (rc = orte_ns.create_process_name(&test_name, job, vpid))) { fprintf(stderr, "test_ns_replica: failed to create proc name after vpid range with error %s\n", ORTE_ERROR_NAME(rc)); @@ -131,15 +114,7 @@ int main(int argc, char **argv) fprintf(stderr, "(%d) strings: jobid - %s\n", i, tmp); } free(tmp); - if (ORTE_SUCCESS != (rc = orte_ns.get_cellid_string(&tmp, test_name))) { - fprintf(stderr, "test_ns_replica: failed to get cellid_string with error %s\n", - ORTE_ERROR_NAME(rc)); - exit(1); - } else { - fprintf(stderr, "(%d) strings: cellid - %s\n", i, tmp); - } - free(tmp); - + } } diff --git a/orte/tools/Makefile.am b/orte/tools/Makefile.am index f56fe0b8de..d0cd121464 100644 --- a/orte/tools/Makefile.am +++ b/orte/tools/Makefile.am @@ -21,7 +21,6 @@ # This makefile.am does not stand on its own - it is included from orte/Makefile.am SUBDIRS += \ - tools/console \ tools/orteboot \ tools/orted \ tools/ortehalt \ @@ -33,7 +32,6 @@ SUBDIRS += \ tools/orte-clean DIST_SUBDIRS += \ - tools/console \ tools/orteboot \ tools/orted \ tools/ortehalt \ diff --git a/orte/tools/console/Makefile.am b/orte/tools/console/Makefile.am deleted file mode 100644 index 89c01d6122..0000000000 --- a/orte/tools/console/Makefile.am +++ /dev/null @@ -1,45 +0,0 @@ -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# - -libs = \ - $(top_builddir)/orte/libopen-rte.la - -headers = orteconsole.h - -if OMPI_INSTALL_BINARIES - -dist_pkgdata_DATA = help-orteconsole.txt - -bin_PROGRAMS = \ - orteconsole - -endif - -orteconsole_SOURCES = \ - $(headers) \ - orteconsole.c - -orteconsole_LDADD = $(libs) -orteconsole_DEPENDENCIES = $(libs) - -if WANT_INSTALL_HEADERS -ortedir = $(includedir)/openmpi/orte/tools/orteconsole -orte_HEADERS = $(headers) -else -ortedir = $(includedir) -endif diff --git a/orte/tools/console/help-orteconsole.txt b/orte/tools/console/help-orteconsole.txt deleted file mode 100644 index 153860c276..0000000000 --- a/orte/tools/console/help-orteconsole.txt +++ /dev/null @@ -1,56 +0,0 @@ -# -*- text -*- -# -# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana -# University Research and Technology -# Corporation. All rights reserved. -# Copyright (c) 2004-2005 The University of Tennessee and The University -# of Tennessee Research Foundation. All rights -# reserved. -# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, -# University of Stuttgart. All rights reserved. -# Copyright (c) 2004-2005 The Regents of the University of California. -# All rights reserved. -# $COPYRIGHT$ -# -# Additional copyrights may follow -# -# $HEADER$ -# -# This is the US/English general help file for Open RTE's Console. -# -[orteconsole:splash-screen] - -Welcome to Open RTE console - -Type 'help' for assistance. - -[orteconsole:failed-boot] -Open RTE was unable to lauch daemons on the specified list of machines. -Returned value %d instead of ORTE_SUCCESS. -[orteconsole:usage] -Usage: %s [OPTION]... -Start the Open RTE Console interface - -%s -[orteconsole:invalid-num-arguments] -Open RTE was unable to execute the %s command. This command requires -at least %d arguments and only %d were provided. See 'help %s' for -usage information. -[orteconsole:init-failure] -Open RTE was unable to initialize properly. The error occured while -attempting to %s. Returned value %d instead of ORTE_SUCCESS. -[orteconsole:finalize-failure] -Open RTE was unable to finalize properly. The error occured while -attempting to %s. Returned value %d instead of ORTE_SUCCESS. -[orteconsole:unknown-command] -Open RTE Console did not recognize the command: - %s -[orteconsole:unimplemented-command] -The command "%s" is not currently implemented in the Open RTE Console. -[orteconsole:failed-command] -The command "%s" failed with return value %d. -[orteconsole:no-hosts] -Open RTE Console did not find any hosts. -[orteconsole:no-daemon-started] -Open RTE was unable to find a started daemon. Use the boot command -to start a daemon on you system. diff --git a/orte/tools/console/orteconsole.c b/orte/tools/console/orteconsole.c deleted file mode 100644 index 0741339827..0000000000 --- a/orte/tools/console/orteconsole.c +++ /dev/null @@ -1,1006 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ -/** @file **/ - -#include "orte_config.h" - -#include - -#include -#include -#ifdef HAVE_UNISTD_H -#include -#endif - -#include "orte/orte_constants.h" - -#include "opal/util/cmd_line.h" -#include "opal/util/argv.h" -#include "opal/class/opal_list.h" -#include "opal/util/output.h" -#include "opal/util/os_path.h" -#include "opal/util/show_help.h" -#include "opal/mca/base/base.h" - -#include "orte/dss/dss.h" -#include "orte/util/sys_info.h" -#include "orte/util/proc_info.h" -#include "orte/util/session_dir.h" -#include "orte/util/universe_setup_file_io.h" -#include "orte/runtime/runtime.h" -#include "orte/mca/rmgr/rmgr.h" -#include "orte/mca/errmgr/errmgr.h" -#include "orte/mca/rml/rml.h" -#include "orte/mca/ras/ras.h" -#include "orte/mca/rds/base/base.h" -#include "orte/mca/ns/ns.h" -#include "orte/mca/gpr/gpr.h" -#include "orte/mca/pls/base/base.h" -#include "orte/runtime/orte_setup_hnp.h" - -#include "orte/tools/console/orteconsole.h" - -/* - * Global Variables - */ -static bool exit_cmd; -static bool daemon_is_active; - -/* - * Globals for catching command line options - */ -orte_console_globals_t orte_console_globals; - -opal_cmd_line_init_t cmd_line_opts[] = { - { NULL, NULL, NULL, 'h', NULL, "help", 0, - &orte_console_globals.help, OPAL_CMD_LINE_TYPE_BOOL, - "This help message" }, - - /* A Hostfile */ - { "rds", "hostfile", "path", '\0', "hostfile", "hostfile", 1, - &orte_console_globals.hostfile, OPAL_CMD_LINE_TYPE_STRING, - "Provide a hostfile" }, - - { "rds", "hostfile", "path", '\0', "machinefile", "machinefile", 1, - &orte_console_globals.hostfile, OPAL_CMD_LINE_TYPE_STRING, - "Provide a hostfile" }, - - { "orte", "debug", NULL, 'd', NULL, "debug-devel", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Enable debugging of OpenRTE" }, - { "orte", "debug", "daemons", '\0', NULL, "debug-daemons", 0, - NULL, OPAL_CMD_LINE_TYPE_INT, - "Enable debugging of any OpenRTE daemons used by this application" }, - { "orte", "debug", "daemons_file", '\0', NULL, "debug-daemons-file", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Enable debugging of any OpenRTE daemons used by this application, storing output in files" }, - { "orte", "no_daemonize", NULL, '\0', NULL, "no-daemonize", 0, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Do not detach OpenRTE daemons used by this application" }, - { "universe", NULL, NULL, '\0', NULL, "universe", 1, - NULL, OPAL_CMD_LINE_TYPE_STRING, - "Set the universe name as username@hostname:universe_name for this application" }, - { NULL, NULL, NULL, '\0', NULL, "tmpdir", 1, - &orte_process_info.tmpdir_base, OPAL_CMD_LINE_TYPE_STRING, - "Set the root for the session directory tree for orterun ONLY" }, - { "orte", "universe", "exist", '\0', NULL, NULL, (int)false, - NULL, OPAL_CMD_LINE_TYPE_BOOL, - "Report error if universe does not already exist" }, - - /* End of list */ - { NULL, NULL, NULL, '\0', NULL, NULL, 0, - NULL, OPAL_CMD_LINE_TYPE_NULL, - NULL } -}; - -/* - * Global structure describing valid internal commands - */ -orte_console_command_t console_commands[] = { - { "add", NULL, 1, ORTE_CONSOLE_TYPE_STD, - orte_console_add_host, - "add [ ...]", - "Add a host to the current universe" }, - - { "alias", NULL, 0, ORTE_CONSOLE_TYPE_HIDDEN, - orte_console_not_imp, - "alias [ ]", - "Alias command" }, - - { "boot", "b", 0, ORTE_CONSOLE_TYPE_STD, - orte_console_boot_daemons, - "boot [hostname] [username]", - "Launch Persistant Daemons. This will use the specifiec host or the first host added." }, - - { "clean", "cl", 0, ORTE_CONSOLE_TYPE_HIDDEN, - orte_console_not_imp, - "clean", - "Kill all jobs in the universe, preserving all daemons" }, - - { "conf", NULL, 0, ORTE_CONSOLE_TYPE_STD, - orte_console_display_configuration, - "conf [-a]", - "Diplay a list of the machines in the current universe" }, - - { "contactinfo", "ci", 0, ORTE_CONSOLE_TYPE_STD, - orte_console_contactinfo, - "contactinfo", - "Query Contact Information from Daemons" }, - - { "cwd", NULL, 0, ORTE_CONSOLE_TYPE_HIDDEN, - orte_console_not_imp, - "cwd [name]", - "Set or print the current working directory" }, - - { "delete", "del", 1, ORTE_CONSOLE_TYPE_STD, - orte_console_remove_host, - "delete [ ...]", - "Delete a host from the current universe" }, - - { "exit", "e", 0, ORTE_CONSOLE_TYPE_STD, - orte_console_exit, - "exit", - "Exit the console" }, - - { "expire", NULL, 1, ORTE_CONSOLE_TYPE_HIDDEN, - orte_console_not_imp, - "expire ", - "Expire process information" }, - - { "halt", NULL, 0, ORTE_CONSOLE_TYPE_STD, - orte_console_halt, - "halt", - "Halt virtual machine" }, - - { "haltall", NULL, 0, ORTE_CONSOLE_TYPE_HIDDEN, - orte_console_not_imp, - "haltall", - "Halt virtual machine and stop all services" }, - - { "help", "h", 0, ORTE_CONSOLE_TYPE_STD, - orte_console_help, - "help [command]", - "Print this display" }, - -#ifdef HAVE_READLINE - { "history", NULL, 0, ORTE_CONSOLE_TYPE_HIDDEN, - orte_console_not_imp, - "history", - "Display list of command history" }, -#endif - - { "kill", NULL, 1, ORTE_CONSOLE_TYPE_HIDDEN, - orte_console_not_imp, - "kill [ ...]", - "Terminate process(es)" }, - - { "killall", NULL, 1, ORTE_CONSOLE_TYPE_HIDDEN, - orte_console_not_imp, - "killall [ ...]", - "Terminate all process(es) in runID" }, - - { "mpispawn", NULL, 3, ORTE_CONSOLE_TYPE_HIDDEN, - orte_console_not_imp, - "mpispawn -np [ <-option name> [option argument] ] ", - "Spawn MPI process(es)" }, - - { "ps", NULL, 0, ORTE_CONSOLE_TYPE_STD, - orte_console_ps, - "ps", - "Display process(es) status" }, - - { "quit", "q", 0, ORTE_CONSOLE_TYPE_STD, - orte_console_exit, - "quit", - "Quit from console" }, - - { "reset", NULL, 0, ORTE_CONSOLE_TYPE_HIDDEN, - orte_console_not_imp, - "reset", - "Kill all tasks" }, - - { "service", NULL, 2, ORTE_CONSOLE_TYPE_HIDDEN, - orte_console_not_imp, - "service [service_name] [operation]", - "Service management" }, - - { "sig", NULL, 2, ORTE_CONSOLE_TYPE_HIDDEN, - orte_console_not_imp, - "sig [ ...]", - "Send signal to process(es)" }, - - { "sigall", NULL, 2, ORTE_CONSOLE_TYPE_HIDDEN, - orte_console_not_imp, - "sigall [ ...]", - "Send signal to all process(es) in runID" }, - - { "spawn", NULL, 3, ORTE_CONSOLE_TYPE_HIDDEN, - orte_console_not_imp, - "spawn -np [ <-option name> [option argument] ] ", - "Spawn process(es)" }, - - { "unalias", NULL, 1, ORTE_CONSOLE_TYPE_HIDDEN, - orte_console_not_imp, - "unalias ", - "unalias command" }, - - { "vmname", NULL, 0, ORTE_CONSOLE_TYPE_HIDDEN, - orte_console_not_imp, - "vmname [name]", - "Set or print the current virtual machine name" }, - - { "dump", NULL, 0, ORTE_CONSOLE_TYPE_STD, - orte_console_dump, - "dump [arg1 arg2]", - "Dump registry data - [all, segment, triggers, subscriptions] [segment_name]" }, - - /* End of list */ - { NULL, NULL, 0, ORTE_CONSOLE_TYPE_NULL, - NULL, - NULL, - NULL } -}; - -/* This should be added to opal_list.c ??? JJH */ -static int opal_list_clear(opal_list_t *list) { - opal_list_item_t *item; - - while ( NULL != (item = opal_list_remove_first(list) ) ) { - OBJ_RELEASE(item); - } - - return ORTE_SUCCESS; -} - -int main(int argc, char *argv[]) -{ - int ret=0; - opal_cmd_line_t *cmd_line; - char *usercmd; - orte_console_input_command_t input_command; - - /* - * Setup to check common command line options - */ - memset(&orte_console_globals, 0, sizeof(orte_console_globals_t)); - cmd_line = OBJ_NEW(opal_cmd_line_t); - opal_cmd_line_create(cmd_line, cmd_line_opts); - if (ORTE_SUCCESS != (ret = opal_cmd_line_parse(cmd_line, false, - argc, argv))) { - char *args = NULL; - args = opal_cmd_line_get_usage_msg(cmd_line); - opal_show_help("help-orteconsole.txt", "orteconsole:usage", false, - argv[0], args); - free(args); - return ret; - } - - /* Check for help request */ - if ( orte_console_globals.help ) { - char *args = NULL; - args = opal_cmd_line_get_usage_msg(cmd_line); - opal_show_help("help-orteconsole.txt", "orteconsole:usage", false, - argv[0], args); - free(args); - return 1; - } - - opal_show_help("help-orteconsole.txt", "orteconsole:splash-screen", false); - - /* - * Intialize the ORTE environment - */ - /* Set the flag telling orte_init that I am NOT a - * singleton, but am "infrastructure" - prevents setting - * up incorrect infrastructure that only a singleton would - * require - */ - daemon_is_active = true; - if (ORTE_SUCCESS != (ret = orte_init(ORTE_INFRASTRUCTURE, ORTE_NON_BARRIER)) ) { - if (ORTE_ERR_UNREACH == ret) { - opal_output(0, "Specified universe could not be reached - please ensure it has been started\n"); - return ret; - } else { - opal_show_help("help-orteconsole.txt", "orteconsole:init-failure", false, - "orte_init()", ret); - return ret; - } - } - if (orte_process_info.seed) { - daemon_is_active = false; - } - - /* - * Work Loop - */ - OBJ_CONSTRUCT(&orte_console_hosts, opal_list_t); - if (NULL != orte_ras.node_query) orte_ras.node_query(&orte_console_hosts); - - exit_cmd = false; - memset(&input_command, 0, sizeof(orte_console_input_command_t)); - while ( !exit_cmd ) { - usercmd = orte_console_get_input_line(); - if (NULL == usercmd || 0 >= strlen(usercmd) ) { - continue; - } - - orte_console_parse_command(usercmd, &input_command); - - orte_console_execute_command(input_command); - } - - OBJ_DESTRUCT(&orte_console_hosts); - - /* - * Finialize ORTE Environment - */ - if ( ORTE_SUCCESS != (ret = orte_finalize()) ) { - opal_show_help("help-orteconsole.txt", "orteconsole:finalize-failure", false, - "orte_finalize()", ret); - return ret; - } - - return ORTE_SUCCESS; -} - -static int command_cmp(char* user_command, orte_console_command_t system_command) { - - /* - * Check for Full Name Match - */ - if ( 0 == strncmp(user_command, system_command.cmd_full_name, - strlen(system_command.cmd_full_name)) ) { - return 0; - } - /* - * Check for Short Name Match - */ - else if ( ( NULL != system_command.cmd_short_name ) && - ( strlen(user_command) == strlen(system_command.cmd_short_name) ) && - ( 0 == strncmp(user_command, system_command.cmd_short_name, - strlen(system_command.cmd_short_name)) ) ) { - return 0; - } - - return -1; -} - -static int orte_console_execute_command(orte_console_input_command_t input_command) { - orte_console_command_t *cur_cmd; - int i, ret; - - for (i = 0; console_commands[i].cmd_type != ORTE_CONSOLE_TYPE_NULL; ++i) { - cur_cmd = &console_commands[i]; - - /* Check for matching command */ - if ( 0 == command_cmp(input_command.cmd_name, *cur_cmd) ){ - /* Check number of arguments */ - if (input_command.argc < (cur_cmd->cmd_args+1)) { - opal_show_help("help-orteconsole.txt", "orteconsole:invalid-num-arguments", false, - input_command.cmd_name, cur_cmd->cmd_args, - input_command.argc, cur_cmd->cmd_full_name); - return ORTE_ERROR; - } - - ret = cur_cmd->cmd_function(input_command); - - /* Check Return Codes */ - if ( ORTE_ERR_NOT_IMPLEMENTED == ret ) { - opal_show_help("help-orteconsole.txt", "orteconsole:unimplemented-command", false, - cur_cmd->cmd_full_name); - return ret; - } - else if ( ORTE_SUCCESS != ret ) { - opal_show_help("help-orteconsole.txt", "orteconsole:failed-command", false, - cur_cmd->cmd_full_name, ret); - return ret; - } - - break; - } - } - - /* - * If command was not found :( - */ - if ( ORTE_CONSOLE_TYPE_NULL == console_commands[i].cmd_type ) { - opal_show_help("help-orteconsole.txt", "orteconsole:unknown-command", false, - input_command.cmd_name); - return ORTE_ERR_NOT_IMPLEMENTED; - } - - return ORTE_SUCCESS; -} - -static int orte_console_parse_command(char * usercmd, orte_console_input_command_t *input_command){ - - input_command->argv = opal_argv_split(usercmd, ' '); - input_command->argc = opal_argv_count(input_command->argv); - input_command->cmd_name = strdup(input_command->argv[0]); - - return ORTE_SUCCESS; -} - -/* =========================== - * Actual Functionality below - * =========================== */ - -static int orte_console_not_imp(orte_console_input_command_t input_command) { - return ORTE_ERR_NOT_IMPLEMENTED; -} - -static int orte_console_dump(orte_console_input_command_t input_command) { - int i, j; - - if(daemon_is_active) { - if (NULL == input_command.argv[1]) { /** default to dump_all */ - orte_gpr.dump_all(); - } else if (strcmp(input_command.argv[1], "segment") == 0) { - if (2 < input_command.argc) { /** specific segment was requested */ - for (i=2; i < input_command.argc; i++) orte_gpr.dump_segment(input_command.argv[i]); - } else { /** nothing specific - dump them all */ - orte_gpr.dump_segment(NULL); - } - } else if (strcmp(input_command.argv[1], "trigger") == 0) { - if (2 < input_command.argc) { /** specific trigger was requested */ - j = strtol(input_command.argv[2], NULL, 10); - orte_gpr.dump_triggers(j); - } else { /** nothing specific - dump them all */ - orte_gpr.dump_triggers(0); - } - } else if (strcmp(input_command.argv[1], "subs") == 0) { - if (2 < input_command.argc) { /** specific subscription was requested */ - j = strtol(input_command.argv[2], NULL, 10); - orte_gpr.dump_subscriptions(j); - } else { /** nothing specific - dump them all */ - orte_gpr.dump_subscriptions(0); - } - } else if (strcmp(input_command.argv[1], "callbacks") == 0) { - orte_gpr.dump_callbacks(); - } else if (strcmp(input_command.argv[1], "cells") == 0) { - orte_ns.dump_cells(); - } else if (strcmp(input_command.argv[1], "jobs") == 0) { - orte_ns.dump_jobs(); - } else if (strcmp(input_command.argv[1], "tags") == 0) { - orte_ns.dump_tags(); - } else if (strcmp(input_command.argv[1], "datatypes") == 0) { - orte_ns.dump_datatypes(); - } else { - /** let user know that this isn't recognized */ - opal_output(0, "orteconsole: specified dump option not recognized\n"); - } - } else { - /** let user know that this isn't available */ - opal_output(0, "orteconsole: no daemon is active - dump cannot be executed\n"); - } - - return ORTE_SUCCESS; -} - -static int orte_console_ps(orte_console_input_command_t input_command) { - if(daemon_is_active) { - /** find the jobids in the system */ - /** for each jobid, get its status and output the info */ - /** no real way to do this right now - need the 2.0 interface - * so let's just punt for the moment - */ - orte_ns.dump_jobs(); - } else { - /** let user know that this isn't available */ - opal_output(0, "orteconsole: no daemon is active - ps cannot be executed\n"); - } - - return ORTE_SUCCESS; -} - -static int add_hosts_to_registry(opal_list_t *updates) { - orte_rds_cell_desc_t *rds_item; - orte_rds_cell_attr_t *new_attr; - orte_ras_node_t *ras_item; - opal_list_item_t *item; - opal_list_t rds_updates; - int ret; - orte_cellid_t local_cellid; - bool need_cellid = true; - - OBJ_CONSTRUCT(&rds_updates, opal_list_t); - - /* Convert RAS list to RDS list */ - for ( item = opal_list_get_first(updates); - item != opal_list_get_end( updates); - item = opal_list_get_next( item)) { - ras_item = (orte_ras_node_t *) item; - - rds_item = OBJ_NEW(orte_rds_cell_desc_t); - if (NULL == rds_item) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - - rds_item->site = strdup("Console"); - rds_item->name = strdup(ras_item->node_name); - - if(need_cellid) { -#if 0 /* JJH Repair when cellid's are fixed */ - /* Create a new cellid */ - ret = orte_ns.create_cellid(&local_cellid, rds_item->site, rds_item->name); - if (ORTE_SUCCESS != ret) { - ORTE_ERROR_LOG(ret); - return ret; - } -#else - local_cellid = 0; -#endif - } - rds_item->cellid = local_cellid; - ras_item->node_cellid = local_cellid; - - new_attr = OBJ_NEW(orte_rds_cell_attr_t); - if (NULL == new_attr) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - new_attr->keyval.value = OBJ_NEW(orte_data_value_t); - if (NULL == new_attr->keyval.value) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - new_attr->keyval.key = strdup(ORTE_RDS_NAME); - new_attr->keyval.value->type = ORTE_STRING; - new_attr->keyval.value->data = strdup(ras_item->node_name); - opal_list_append(&(rds_item->attributes), &new_attr->super); - - new_attr = OBJ_NEW(orte_rds_cell_attr_t); - if (NULL == new_attr) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - new_attr->keyval.value = OBJ_NEW(orte_data_value_t); - if (NULL == new_attr->keyval.value) { - ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE); - return ORTE_ERR_OUT_OF_RESOURCE; - } - new_attr->keyval.key = strdup(ORTE_CELLID_KEY); - new_attr->keyval.value->type = ORTE_CELLID; - if (ORTE_SUCCESS != (ret = orte_dss.copy((void**)&(new_attr->keyval.value->data), &(rds_item->cellid), ORTE_CELLID))) { - ORTE_ERROR_LOG(ret); - return ret; - } - opal_list_append(&(rds_item->attributes), &new_attr->super); - - opal_list_append(&rds_updates, &rds_item->super); - } - - /* Add the hosts to the registry */ - ret = orte_rds.store_resource(&rds_updates); - if (ORTE_SUCCESS != ret) { - return ret; - } - - ret = orte_ras.node_insert(updates); - if (ORTE_SUCCESS != ret ) { - return ret; - } - - opal_list_clear(&rds_updates); - OBJ_DESTRUCT(&rds_updates); - - return ORTE_SUCCESS; -} - -static int remove_hosts_from_registry(opal_list_t *updates) { - opal_list_t rds_updates; - /* int ret; */ - - OBJ_CONSTRUCT(&rds_updates, opal_list_t); - - /* Add the hosts to the registry */ -#if 0 /* This functionality needs to be written */ - orte_rds_base_convert_ras_to_rds(updates, &rds_updates); - - ret = orte_rds_base_node_delete(&rds_updates); - if (ORTE_SUCCESS != ret) { - return ret; - } - - ret = orte_ras.node_delete(updates); - if (ORTE_SUCCESS != ret ) { - return ret; - } -#endif - - opal_list_clear(&rds_updates); - OBJ_DESTRUCT(&rds_updates); - - return ORTE_SUCCESS; -} - - -static int orte_console_add_host(orte_console_input_command_t input_command) { - int i, ret; - orte_ras_node_t *tmp_host; - opal_list_t hosts_to_add; - - OBJ_CONSTRUCT(&hosts_to_add, opal_list_t); - - for(i = 1; i < input_command.argc; ++i) { - tmp_host = OBJ_NEW(orte_ras_node_t); - if (NULL == tmp_host) { - return ORTE_ERR_OUT_OF_RESOURCE; - } - - tmp_host->node_name = strdup(input_command.argv[i]); - tmp_host->node_arch = strdup("unknown"); - tmp_host->node_cellid = 0; /* JJH Repair when cellid's are fixed */ - tmp_host->node_slots_inuse = 0; - tmp_host->node_slots_max = 1; - tmp_host->node_slots = 1; - - if (daemon_is_active) { - opal_list_append(&hosts_to_add, &tmp_host->super); - } - else { - opal_list_append(&orte_console_hosts, &tmp_host->super); - } - - printf("Added Host: <%s>\n", input_command.argv[i]); - } - - if ( !opal_list_is_empty(&hosts_to_add) && daemon_is_active) { - /* - * If there is an active daemon, then add to the registry - */ - ret = add_hosts_to_registry(&hosts_to_add); - if (ORTE_SUCCESS != ret) { - return ret; - } - - /* Get a new list of registered hosts */ - opal_list_clear(&orte_console_hosts); - ret = orte_ras.node_query(&orte_console_hosts); - if (ORTE_SUCCESS != ret) { - return ret; - } - } - - opal_list_clear(&hosts_to_add); - OBJ_DESTRUCT(&hosts_to_add); - - return ORTE_SUCCESS; -} - -static int orte_console_remove_host(orte_console_input_command_t input_command) { - int i, ret; - orte_ras_node_t *tmp_host; - opal_list_item_t *item; - opal_list_t hosts_to_remove; - bool done; - - OBJ_CONSTRUCT(&hosts_to_remove, opal_list_t); - - for(i = 1; i < input_command.argc; ++i) { - done = false; - for (item = opal_list_get_first(&orte_console_hosts); - item != opal_list_get_end( &orte_console_hosts); - item = opal_list_get_next( item)) { - tmp_host = (orte_ras_node_t *)item; - - if (0 == strcmp(tmp_host->node_name, input_command.argv[i])) { - opal_list_remove_item(&orte_console_hosts, item); - opal_list_append(&hosts_to_remove, item); - done = true; - break; - } - } - if(!done) { - printf("Could not find host <%s>\n", input_command.argv[i]); - } - } - - if ( !opal_list_is_empty(&hosts_to_remove) && daemon_is_active) { - /* Delete hosts from registry */ - ret = remove_hosts_from_registry(&hosts_to_remove); - if (ORTE_SUCCESS != ret) { - return ret; - } - - /* Get a new list of registered hosts */ - opal_list_clear(&orte_console_hosts); - ret = orte_ras.node_query(&orte_console_hosts); - if (ORTE_SUCCESS != ret) { - return ret; - } - } - - opal_list_clear(&hosts_to_remove); - OBJ_DESTRUCT(&hosts_to_remove); - - return ORTE_SUCCESS; -} - -static int orte_console_display_configuration(orte_console_input_command_t input_command) { - orte_ras_node_t *tmp_host; - opal_list_item_t *item; - int i; - - if ( opal_list_is_empty(&orte_console_hosts) ) { - opal_show_help("help-orteconsole.txt", "orteconsole:no-hosts", false); - return ORTE_SUCCESS; - } - - printf("%6s %15s %10s %13s %15s\n", "Index", - "Hostname", "CPU(s)", - "CPU(s) Used", "Arch"); - for (item = opal_list_get_first(&orte_console_hosts), i = 0; - item != opal_list_get_end( &orte_console_hosts); - item = opal_list_get_next( item), ++i) { - tmp_host = (orte_ras_node_t *)item; - printf("%6d %15s %10lu %13lu %15s\n", i, - tmp_host->node_name, (unsigned long)tmp_host->node_slots, - (unsigned long)tmp_host->node_slots_inuse, - tmp_host->node_arch); - } - - return ORTE_SUCCESS; -} - -static int orte_console_boot_daemons(orte_console_input_command_t input_command) { - int rc, id; - orte_ras_node_t *item; - char *remote_daemon; - char *username = NULL; - - if ( opal_list_is_empty(&orte_console_hosts) && 1 >= input_command.argc ) { - opal_show_help("help-orteconsole.txt", "orteconsole:no-hosts", false); - return ORTE_ERROR; - } - - /* If hostname supplied on command line use it */ - if ( 1 < input_command.argc) { - remote_daemon = strdup(input_command.argv[1]); - } - /* Otherwise get first node in list to serve as the primary daemon */ - else { - item = (orte_ras_node_t *)opal_list_get_first(&orte_console_hosts); - remote_daemon = strdup(item->node_name); - } - - printf("Launching Remote Daemon on \"%s\"", remote_daemon); - - /* If they supplied a username then use that, - otherwise assume same username as on the console system */ - if ( 2 < input_command.argc) { - username = strdup(input_command.argv[2]); - printf(" Username \"%s\"\n", username); - } - else { - username = NULL; - printf("\n"); - } - - /* Create the persistent daemon */ - id = mca_base_param_register_int("persistent",NULL,NULL,NULL,(int)false); - mca_base_param_set_int(id, (int)true); - - rc = orte_setup_hnp(NULL, remote_daemon, username); - if ( ORTE_SUCCESS != rc) { - printf("Open RTE Boot: Failed!\n"); - return rc; - } - - printf("Open RTE Boot: Successful!\n"); - daemon_is_active = true; - - return ORTE_SUCCESS; -} - -static int orte_console_halt(orte_console_input_command_t input_command) { - int ret; - - if(!daemon_is_active) { - opal_show_help("help-orteconsole.txt", "orteconsole:no-daemon-started", false); - return ORTE_SUCCESS; - } - - ret = orte_console_send_command(ORTE_DAEMON_EXIT_CMD); - if (ORTE_SUCCESS != ret) { - return ret; - } - - return ORTE_SUCCESS; -} - -static int orte_console_exit(orte_console_input_command_t input_command) { - exit_cmd = true; - - return ORTE_SUCCESS; -} - -static int orte_console_help(orte_console_input_command_t input_command) { - orte_console_command_t *cur_cmd; - int i; - - /* - * Generic Help - */ - if ( input_command.argc <= 1 ) { - printf("Open RTE Console Commands:\n\n"); - - for (i = 0; console_commands[i].cmd_type != ORTE_CONSOLE_TYPE_NULL; ++i) { - cur_cmd = &console_commands[i]; - if ( ORTE_CONSOLE_TYPE_HIDDEN != cur_cmd->cmd_type ) { - printf("%15s ", cur_cmd->cmd_full_name); - if ( NULL == cur_cmd->cmd_short_name ) { - printf(" "); - } - else { - printf(" | %5s ", cur_cmd->cmd_short_name); - } - printf("\t%s\n", cur_cmd->cmd_description); - } - } - - printf("\n"); - } - /* - * Specific Help Message for a Command - */ - else { - for(i = 0; console_commands[i].cmd_type != ORTE_CONSOLE_TYPE_NULL; ++i) { - cur_cmd = &console_commands[i]; - - if ( 0 == command_cmp(input_command.argv[1], *cur_cmd) ){ - printf("Command:\n"); - printf("\t%s ", cur_cmd->cmd_full_name); - if ( NULL != cur_cmd->cmd_short_name ) { - printf(" | %5s", cur_cmd->cmd_short_name); - } - printf("\n"); - - if ( NULL != cur_cmd->cmd_usage ) { - printf("Usage:\n"); - printf("\t%s\n", cur_cmd->cmd_usage); - } - - printf("Description:\n"); - printf("\t%s\n", cur_cmd->cmd_description); - - break; - } - } - - /* - * Command Not Found - */ - if( ORTE_CONSOLE_TYPE_NULL == console_commands[i].cmd_type ) { - opal_show_help("help-orteconsole.txt", "orteconsole:unknown-command", false, - input_command.argv[1]); - return ORTE_SUCCESS; - } - - printf("\n"); - } - - return ORTE_SUCCESS; -} - -/* - * Get the contact information for the remote daemon - */ -static int orte_console_contactinfo(orte_console_input_command_t input_command) { - char * str_response; - orte_buffer_t *buffer = NULL; - orte_process_name_t seed={0,0,0}; - int ret; - orte_std_cntr_t n; - - if(!daemon_is_active) { - opal_show_help("help-orteconsole.txt", "orteconsole:no-daemon-started", false); - return ORTE_SUCCESS; - } - - /** initialize the buffer */ - buffer = OBJ_NEW(orte_buffer_t); - - /* Start the exchange */ - ret = orte_console_send_command(ORTE_DAEMON_CONTACT_QUERY_CMD); - if (ORTE_SUCCESS != ret ){ - ORTE_ERROR_LOG(ret); - return ret; - } - - ret = orte_rml.recv_buffer(&seed, buffer, ORTE_RML_TAG_DAEMON, 0); - if ( 0 > ret) { - ORTE_ERROR_LOG(ret); - return ret; - } - - n = 1; - ret = orte_dss.unpack(buffer, &str_response, &n, ORTE_STRING); - if ( ORTE_SUCCESS != ret ) { - ORTE_ERROR_LOG(ret); - return ret; - } - - printf(str_response); - printf("\n"); - - /** cleanup the buffer */ - OBJ_RELEASE(buffer); - - return ORTE_SUCCESS; -} - -/* - * Send a command to the remote daemon - */ -static int orte_console_send_command(orte_daemon_cmd_flag_t usercmd) -{ - orte_buffer_t *cmd; - orte_daemon_cmd_flag_t command; - orte_process_name_t seed = {0,0,0}; - int rc; - - if(!daemon_is_active) { - opal_show_help("help-orteconsole.txt", "orteconsole:no-daemon-started", false); - return ORTE_SUCCESS; - } - - cmd = OBJ_NEW(orte_buffer_t); - if (NULL == cmd) { - ORTE_ERROR_LOG(ORTE_ERROR); - return ORTE_ERROR; - } - - command = usercmd; - - rc = orte_dss.pack(cmd, &command, 1, ORTE_DAEMON_CMD); - if ( ORTE_SUCCESS != rc ) { - ORTE_ERROR_LOG(rc); - OBJ_RELEASE(cmd); - return rc; - } - - rc = orte_rml.send_buffer(&seed, cmd, ORTE_RML_TAG_DAEMON, 0); - if ( 0 > rc ) { - ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); - OBJ_RELEASE(cmd); - return ORTE_ERR_COMM_FAILURE; - } - - OBJ_RELEASE(cmd); - - return ORTE_SUCCESS; -} - -static char *orte_console_get_input_line(void) -{ - char *ret, *buff; - char input[ORTE_CONSOLE_MAX_LINE_LENGTH]; - - printf("orteconsole> "); - - ret = fgets(input, ORTE_CONSOLE_MAX_LINE_LENGTH, stdin); - if (NULL != ret) { - input[strlen(input)-1] = '\0'; /* remove newline */ - buff = strdup(input); - return buff; - } - - return NULL; -} diff --git a/orte/tools/console/orteconsole.h b/orte/tools/console/orteconsole.h deleted file mode 100644 index b2b233d402..0000000000 --- a/orte/tools/console/orteconsole.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana - * University Research and Technology - * Corporation. All rights reserved. - * Copyright (c) 2004-2005 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, - * University of Stuttgart. All rights reserved. - * Copyright (c) 2004-2005 The Regents of the University of California. - * All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#ifndef ORTECONSOLE_H -#define ORTECONSOLE_H - -#include "orte_config.h" - -#include "opal/class/opal_list.h" -#include "opal/threads/mutex.h" -#include "opal/threads/condition.h" - -#include "orte/mca/odls/odls_types.h" - -#if defined(c_plusplus) || defined(__cplusplus) -extern "C" { -#endif - -#define ORTE_CONSOLE_MAX_LINE_LENGTH 1024 -#define ORTE_CONSOLE_MAX_ARGC 10 - -/* - * Local Structures - */ - -/* Command line Structure */ -typedef struct { - bool help; - char *hostfile; - - opal_mutex_t lock; - opal_condition_t cond; -} orte_console_globals_t; - -/* Console Command Types */ -enum orte_console_type_t { - ORTE_CONSOLE_TYPE_NULL, - - ORTE_CONSOLE_TYPE_STD, - ORTE_CONSOLE_TYPE_HIDDEN -}; -typedef enum orte_console_type_t orte_console_type_t; - -/* Contained parsed user input */ -typedef struct { - /* Command Name */ - char * cmd_name; - - char ** argv; - int argc; -} orte_console_input_command_t; - -/* Structure detailing each command allowed by the console */ -typedef struct { - /* Full Name for the command */ - const char * cmd_full_name; - /* Common abbreviation for this command */ - const char * cmd_short_name; - /* Number of expected additional arguments */ - int cmd_args; - /* Type of command */ - orte_console_type_t cmd_type; - /* Pointer to the function to execute */ - int (*cmd_function) (orte_console_input_command_t); - /* Short illustration of how the command should be used */ - const char * cmd_usage; - /* Short description of what this command does */ - const char * cmd_description; -} orte_console_command_t; - -/* Local list of allocated hosts */ -static opal_list_t orte_console_hosts; - -/* - * Function for each command - */ -static int orte_console_exit(orte_console_input_command_t); -static int orte_console_help(orte_console_input_command_t); - -static int orte_console_boot_daemons(orte_console_input_command_t); -static int orte_console_add_host(orte_console_input_command_t); -static int orte_console_remove_host(orte_console_input_command_t); -static int orte_console_display_configuration(orte_console_input_command_t); -static int orte_console_halt(orte_console_input_command_t); - -static int orte_console_contactinfo(orte_console_input_command_t); - -static int orte_console_not_imp(orte_console_input_command_t); -static int orte_console_dump(orte_console_input_command_t); -static int orte_console_ps(orte_console_input_command_t); - -/* - * Support Functions - */ -static char *orte_console_get_input_line(void); -static int orte_console_send_command(orte_daemon_cmd_flag_t usercmd); -static int orte_console_parse_command(char * usercmd, orte_console_input_command_t *input_command); -static int orte_console_execute_command(orte_console_input_command_t command); - -#if defined(c_plusplus) || defined(__cplusplus) -} -#endif - -#endif /* ORTECONSOLE_H */ diff --git a/orte/tools/orte-ps/orte-ps.c b/orte/tools/orte-ps/orte-ps.c index 16a0c5578a..28b7a03be1 100644 --- a/orte/tools/orte-ps/orte-ps.c +++ b/orte/tools/orte-ps/orte-ps.c @@ -665,7 +665,6 @@ static int pretty_print_nodes(opal_list_t *nodes) { int i, line_len; int len_name = 0, len_arch = 0, - len_cell = 0, len_state = 0, len_slots = 0, len_slots_i = 0, @@ -676,7 +675,6 @@ static int pretty_print_nodes(opal_list_t *nodes) { */ len_name = (int) strlen("Node Name"); len_arch = (int) strlen("Arch"); - len_cell = (int) strlen("Cell ID"); len_state = (int) strlen("State"); len_slots = (int) strlen("Slots"); len_slots_i = (int) strlen("Slots In Use"); @@ -702,7 +700,6 @@ static int pretty_print_nodes(opal_list_t *nodes) { line_len = (len_name + 3 + len_arch + 3 + - len_cell + 3 + len_state + 3 + len_slots + 3 + len_slots_i + 3 + @@ -713,7 +710,6 @@ static int pretty_print_nodes(opal_list_t *nodes) { */ printf("%*s | ", len_name, "Node Name"); printf("%*s | ", len_arch, "Arch"); - printf("%*s | ", len_cell, "Cell ID"); printf("%*s | ", len_state, "State"); printf("%*s | ", len_slots, "Slots"); printf("%*s | ", len_slots_m, "Slots Max"); @@ -738,7 +734,6 @@ static int pretty_print_nodes(opal_list_t *nodes) { printf("%*s | ", len_arch, (NULL == node->node_arch ? "" : node->node_arch)); - printf("%*d | ", len_cell, node->node_cellid); printf("%*s | ", len_state, pretty_node_state(node->node_state)); printf("%*d | ", len_slots, (uint)node->node_slots); printf("%*d | ", len_slots_m, (uint)node->node_slots_max); @@ -908,7 +903,7 @@ static int pretty_print_vpids(orte_ps_job_info_t *job) { } } - asprintf(&proc_name, "%d.%d.%d", vpid->name.cellid, vpid->name.jobid, vpid->name.vpid); + asprintf(&proc_name, "%d.%d", vpid->name.jobid, vpid->name.vpid); if( (int)strlen(proc_name) > len_o_proc_name ) len_o_proc_name = strlen(proc_name); @@ -983,7 +978,7 @@ static int pretty_print_vpids(orte_ps_job_info_t *job) { printf("\t"); - asprintf(&proc_name, "%d.%d.%d", vpid->name.cellid, vpid->name.jobid, vpid->name.vpid); + asprintf(&proc_name, "%d.%d", vpid->name.jobid, vpid->name.vpid); for( i = 0; i < (int)job->num_app_context; ++i) { if( job->app_context[i]->idx == vpid->app_context_idx ) { @@ -1399,7 +1394,6 @@ static int gather_vpid_info(orte_ps_universe_info_t* universe) { /* * Access the vpid container */ - proc.cellid = 0; proc.jobid = job->id; proc.vpid = v; @@ -1461,7 +1455,6 @@ static int gather_vpid_info(orte_ps_universe_info_t* universe) { exit_status = ret; goto cleanup; } - vpid->name.cellid = tmp_proc->cellid; vpid->name.jobid = tmp_proc->jobid; vpid->name.vpid = tmp_proc->vpid; continue; diff --git a/orte/tools/orte-restart/orte-restart.c b/orte/tools/orte-restart/orte-restart.c index 139d562f94..135fa00254 100644 --- a/orte/tools/orte-restart/orte-restart.c +++ b/orte/tools/orte-restart/orte-restart.c @@ -426,8 +426,7 @@ static int create_appfile(orte_snapc_base_global_snapshot_t *snapshot) vpid_snapshot = (orte_snapc_base_snapshot_t*)item; fprintf(appfile, "#\n"); - fprintf(appfile, "# Old Process Name: %u.%u.%u\n", - vpid_snapshot->process_name.cellid, + fprintf(appfile, "# Old Process Name: %%u.%u\n", vpid_snapshot->process_name.jobid, vpid_snapshot->process_name.vpid); fprintf(appfile, "#\n");